hjh0119
diff --git a/‎docs/source/LLM/支持的模型和数据集.md
Lines changed: 52 additions & 44 deletions b/‎docs/source/LLM/支持的模型和数据集.md
Lines changed: 52 additions & 44 deletions
diff --git a/‎docs/source_en/LLM/Supported-models-datasets.md
Lines changed: 52 additions & 44 deletions b/‎docs/source_en/LLM/Supported-models-datasets.md
Lines changed: 52 additions & 44 deletions
diff --git a/‎scripts/utils/run_dataset_info.py
Lines changed: 1 addition & 1 deletion b/‎scripts/utils/run_dataset_info.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/utils/run_model_info.py
Lines changed: 37 additions & 21 deletions b/‎scripts/utils/run_model_info.py
Lines changed: 37 additions & 21 deletions
@@ -52,7 +52,7 @@ def write_dataset_info() -> None:
             dataset_info = DATASET_MAPPING[dataset_name]
             tags = dataset_info.get('tags', [])
             subsets = dataset_info.get('subsets', [])
-            subsets = ','.join(subsets)
+            subsets = '<br>'.join(subsets)
             if 'audio' in tags:
                 template = mapping['audio']
             elif 'vision' in tags:
 
@@ -5,13 +5,16 @@
 
 def get_model_info_table() -> List[str]:
     fpaths = ['docs/source/LLM/支持的模型和数据集.md', 'docs/source_en/LLM/Supported-models-datasets.md']
-    end_words = ['## 数据集', '## Datasets']
+    end_words = [['### 多模态大模型', '## 数据集'], ['### MLLM', '## Datasets']]
     model_name_list = ModelType.get_model_name_list()
-    result = ('| Model Type | Model ID | Default Lora Target Modules | Default Template |'
-              ' Support Flash Attn | Support VLLM | Requires | Tags | HF Model ID |\n'
-              '| ---------  | -------- | --------------------------- | ---------------- |'
-              ' ------------------ | ------------ | -------- | ---- | ----------- |\n')
-    res: List[str] = []
+    result = [
+        '| Model Type | Model ID | Default Lora Target Modules | Default Template |'
+        ' Support Flash Attn | Support VLLM | Requires | Tags | HF Model ID |\n'
+        '| ---------  | -------- | --------------------------- | ---------------- |'
+        ' ------------------ | ------------ | -------- | ---- | ----------- |\n'
+    ] * 2
+    res_llm: List[str] = []
+    res_mllm: List[str] = []
     bool_mapping = {True: '&#x2714;', False: '&#x2718;'}
     for model_name in model_name_list:
         model_info = MODEL_MAPPING[model_name]
@@ -24,6 +27,11 @@ def get_model_info_table() -> List[str]:
         support_vllm = bool_mapping[support_vllm]
         requires = ', '.join(model_info['requires'])
         tags = model_info.get('tags', [])
+        if 'multi-modal' in tags:
+            tags.remove('multi-modal')
+            is_multi_modal = True
+        else:
+            is_multi_modal = False
         tags_str = ', '.join(tags)
         if len(tags_str) == 0:
             tags_str = '-'
@@ -34,24 +42,32 @@ def get_model_info_table() -> List[str]:
             model_name, model_id, lora_target_modules, template, support_flash_attn, support_vllm, requires, tags_str,
             hf_model_id
         ]
-        res.append(r)
-    text = ''
-    for r in res:
-        ms_url = f'https://modelscope.cn/models/{r[1]}/summary'
-        if r[8] != '-':
-            hf_url = f'https://huggingface.co/{r[8]}'
-            hf_model_id_str = f'[{r[8]}]({hf_url})'
+        if is_multi_modal:
+            res_mllm.append(r)
         else:
-            hf_model_id_str = '-'
-        text += f'|{r[0]}|[{r[1]}]({ms_url})|{r[2]}|{r[3]}|{r[4]}|{r[5]}|{r[6]}|{r[7]}|{hf_model_id_str}|\n'
-    print(f'模型总数: {len(res)}')
-    result += text
-    for idx, fpath in enumerate(fpaths):
+            res_llm.append(r)
+    print(f'LLM总数: {len(res_llm)}, MLLM总数: {len(res_mllm)}')
+    text = ['', '']  # llm, mllm
+    for i, res in enumerate([res_llm, res_mllm]):
+        for r in res:
+            ms_url = f'https://modelscope.cn/models/{r[1]}/summary'
+            if r[8] != '-':
+                hf_url = f'https://huggingface.co/{r[8]}'
+                hf_model_id_str = f'[{r[8]}]({hf_url})'
+            else:
+                hf_model_id_str = '-'
+            text[i] += f'|{r[0]}|[{r[1]}]({ms_url})|{r[2]}|{r[3]}|{r[4]}|{r[5]}|{r[6]}|{r[7]}|{hf_model_id_str}|\n'
+        result[i] += text[i]
+
+    for i, fpath in enumerate(fpaths):
         with open(fpath, 'r') as f:
             text = f.read()
-        start_idx = text.find('| Model Type |')
-        end_idx = text.find(end_words[idx])
-        output = text[:start_idx] + result + '\n\n' + text[end_idx:]
+        llm_start_idx = text.find('| Model Type |')
+        mllm_start_idx = text[llm_start_idx + 1:].find('| Model Type |') + llm_start_idx + 1
+        llm_end_idx = text.find(end_words[i][0])
+        mllm_end_idx = text.find(end_words[i][1])
+        output = text[:llm_start_idx] + result[0] + '\n\n' + text[llm_end_idx:mllm_start_idx] + result[
+            1] + '\n\n' + text[mllm_end_idx:]
         with open(fpath, 'w') as f:
             text = f.write(output)
     return res