11import json
22from tqdm import tqdm
3+ from typing import Any , Dict
34
45from evalscope .benchmarks import DataAdapter
56
7+ # Language dictionaries for dataset markdown generation
8+ DATASET_DETAIL_LOCALE = {
9+ 'back_to_top' : {
10+ 'zh' : '返回目录' ,
11+ 'en' : 'Back to Top'
12+ },
13+ 'toc_title' : {
14+ 'zh' : 'LLM评测集' ,
15+ 'en' : 'LLM Benchmarks'
16+ },
17+ 'dataset_name' : {
18+ 'zh' : '数据集名称' ,
19+ 'en' : 'Dataset Name'
20+ },
21+ 'dataset_id' : {
22+ 'zh' : '数据集ID' ,
23+ 'en' : 'Dataset ID'
24+ },
25+ 'description' : {
26+ 'zh' : '数据集描述' ,
27+ 'en' : 'Description'
28+ },
29+ 'task_categories' : {
30+ 'zh' : '任务类别' ,
31+ 'en' : 'Task Categories'
32+ },
33+ 'evaluation_metrics' : {
34+ 'zh' : '评估指标' ,
35+ 'en' : 'Evaluation Metrics'
36+ },
37+ 'requires_llm_judge' : {
38+ 'zh' : '需要LLM Judge' ,
39+ 'en' : 'Requires LLM Judge'
40+ },
41+ 'default_shots' : {
42+ 'zh' : '默认提示方式' ,
43+ 'en' : 'Default Shots'
44+ },
45+ 'subsets' : {
46+ 'zh' : '数据集子集' ,
47+ 'en' : 'Subsets'
48+ },
49+ 'supported_output_formats' : {
50+ 'zh' : '支持输出格式' ,
51+ 'en' : 'Supported Output Formats'
52+ },
53+ 'extra_parameters' : {
54+ 'zh' : '额外参数' ,
55+ 'en' : 'Extra Parameters'
56+ },
57+ 'system_prompt' : {
58+ 'zh' : '系统提示词' ,
59+ 'en' : 'System Prompt'
60+ },
61+ 'prompt_template' : {
62+ 'zh' : '提示模板' ,
63+ 'en' : 'Prompt Template'
64+ },
65+ 'yes' : {
66+ 'zh' : '是' ,
67+ 'en' : 'Yes'
68+ },
69+ 'no' : {
70+ 'zh' : '否' ,
71+ 'en' : 'No'
72+ },
73+ 'no_description' : {
74+ 'zh' : '暂无详细描述' ,
75+ 'en' : 'No detailed description available'
76+ }
77+ }
78+
79+ DOCUMENT_LOCALE = {
80+ 'title' : {
81+ 'zh' : 'LLM评测集' ,
82+ 'en' : 'LLM Benchmarks'
83+ },
84+ 'intro' : {
85+ 'zh' : '以下是支持的LLM评测集列表,点击数据集标准名称可跳转详细信息。' ,
86+ 'en' : 'Below is the list of supported LLM benchmarks. Click on a benchmark name to jump to details.'
87+ },
88+ 'dataset_name' : {
89+ 'zh' : '数据集名称' ,
90+ 'en' : 'Benchmark Name'
91+ },
92+ 'pretty_name' : {
93+ 'zh' : '标准名称' ,
94+ 'en' : 'Pretty Name'
95+ },
96+ 'task_categories' : {
97+ 'zh' : '任务类别' ,
98+ 'en' : 'Task Categories'
99+ },
100+ 'details_title' : {
101+ 'zh' : '数据集详情' ,
102+ 'en' : 'Benchmark Details'
103+ }
104+ }
6105
7106def wrap_key_words (keywords : list [str ]) -> str :
8107 """
@@ -26,129 +125,33 @@ def process_dictionary(data: dict) -> str:
26125 """
27126 return json .dumps (data , ensure_ascii = False , indent = 4 )
28127
128+ def get_dataset_detail_locale (lang : str ) -> Dict [str , str ]:
129+ """Get localized strings for dataset details"""
130+ return {k : v [lang ] for k , v in DATASET_DETAIL_LOCALE .items ()}
29131
30- def generate_dataset_markdown (data_adapter : DataAdapter ) -> str :
31- """
32- 根据DataAdapter实例生成美观的Markdown数据集介绍
33-
34- Args:
35- data_adapter (DataAdapter): 数据集适配器实例
36-
37- Returns:
38- str: 格式化的Markdown字符串
39- """
40- # 获取基础信息
41- name = data_adapter .name
42- pretty_name = data_adapter .pretty_name or name
43- dataset_id = data_adapter .dataset_id
44- description = data_adapter .description or '暂无详细描述'
45-
46-
47- # 处理数据集ID的链接格式
48- if dataset_id .startswith (('http://' , 'https://' )):
49- dataset_id_md = f'[{ dataset_id } ]({ dataset_id } )'
50- elif '/' in dataset_id : # ModelScope格式的ID
51- dataset_id_md = f'[{ dataset_id } ](https://modelscope.cn/datasets/{ dataset_id } /summary)'
52- else :
53- dataset_id_md = dataset_id
54-
55- # 构建详情部分
56- details = [
57- f'### { pretty_name } ' ,
58- '' ,
59- f'[返回目录](#llm评测集)' ,
60- f'- **数据集名称**: `{ name } `' ,
61- f'- **数据集ID**: { dataset_id_md } ' ,
62- f'- **数据集描述**: \n > { description } ' ,
63- f'- **任务类别**: { wrap_key_words (data_adapter .tags )} ' ,
64- f'- **评估指标**: { wrap_key_words (data_adapter .metric_list )} ' ,
65- f"- **需要LLM Judge**: { '是' if data_adapter .llm_as_a_judge else '否' } " ,
66- f'- **默认提示方式**: { data_adapter .few_shot_num } -shot'
67- ]
68-
69- # 添加数据集子集信息
70- if data_adapter .subset_list :
71- details .append (f'- **数据集子集**: { wrap_key_words (data_adapter .subset_list )} ' )
72-
73- # 添加其他技术信息
74- technical_info = [
75- f'- **支持输出格式**: { wrap_key_words (data_adapter .output_types )} ' ,
76- ]
77-
78- # 添加额外参数信息
79- extra_params = data_adapter .config_kwargs .get ('extra_params' , {})
80- if extra_params :
81- technical_info .append (f'- **额外参数**: \n ```json\n { process_dictionary (extra_params )} \n ```' )
82-
83- # 添加提示模板
84- if data_adapter .system_prompt :
85- technical_info .append (f'- **系统提示词**: \n ```text\n { data_adapter .system_prompt } \n ```' )
86- if data_adapter .prompt_template :
87- technical_info .append (f'- **提示模板**: \n ```text\n { data_adapter .prompt_template } \n ```' )
132+ def get_document_locale (lang : str ) -> Dict [str , str ]:
133+ """Get localized strings for document structure"""
134+ return {k : v [lang ] for k , v in DOCUMENT_LOCALE .items ()}
88135
89- return '\n ' .join (details + ['' ] + technical_info + ['' ])
90-
91-
92- def generate_full_documentation (adapters : list [DataAdapter ]) -> str :
93- """
94- 生成完整的Markdown文档,包含索引和所有数据集详情
95-
96- Args:
97- adapters (list[DataAdapter]): DataAdapter实例列表
98-
99- Returns:
100- str: 完整的Markdown文档
101- """
102- # 生成索引
103- index = [
104- '# LLM评测集' ,
105- '' ,
106- '以下是支持的LLM评测集列表,点击数据集标准名称可跳转详细信息。' ,
107- '' ,
108- '| 数据集名称 | 标准名称 | 任务类别 |' ,
109- '|------------|----------|----------|' ,
110- ]
111-
112- for adapter in adapters :
113- name = adapter .name
114- pretty_name = adapter .pretty_name or name
115- link_name = pretty_name .lower ().replace (' ' , '-' ).replace ('.' , '' )
116- tags = wrap_key_words (adapter .tags )
117- index .append (f'| `{ name } ` | [{ pretty_name } ](#{ link_name } ) | { tags } |' )
118-
119- # 生成详情部分
120- details = [
121- '' ,
122- '---' ,
123- '' ,
124- '## 数据集详情' ,
125- ''
126- ]
127-
128- for i , adapter in enumerate (adapters ):
129- details .append (generate_dataset_markdown (adapter ))
130- if i < len (adapters ) - 1 :
131- details .append ('---' )
132- details .append ('' )
133-
134- return '\n ' .join (index + details )
135-
136-
137- def generate_dataset_markdown_en (data_adapter : DataAdapter ) -> str :
136+ def generate_dataset_markdown (data_adapter : DataAdapter , lang : str = 'zh' ) -> str :
138137 """
139138 Generate a well-formatted Markdown benchmark introduction based on a DataAdapter instance
140139
141140 Args:
142141 data_adapter (DataAdapter): Dataset adapter instance
142+ lang (str): Language code ('zh' for Chinese, 'en' for English)
143143
144144 Returns:
145145 str: Formatted Markdown string
146146 """
147+ # Get localized text
148+ text = get_dataset_detail_locale (lang )
149+
147150 # Get basic information
148151 name = data_adapter .name
149152 pretty_name = data_adapter .pretty_name or name
150153 dataset_id = data_adapter .dataset_id
151- description = data_adapter .description or 'No detailed description available'
154+ description = data_adapter .description or text [ 'no_description' ]
152155
153156 # Format dataset ID links
154157 if dataset_id .startswith (('http://' , 'https://' )):
@@ -162,57 +165,60 @@ def generate_dataset_markdown_en(data_adapter: DataAdapter) -> str:
162165 details = [
163166 f'### { pretty_name } ' ,
164167 '' ,
165- f'[Back to Top](#llm-benchmarks )' ,
166- f'- **Dataset Name **: `{ name } `' ,
167- f'- **Dataset ID **: { dataset_id_md } ' ,
168- f'- **Description **: \n > { description } ' ,
169- f'- **Task Categories **: { wrap_key_words (data_adapter .tags )} ' ,
170- f'- **Evaluation Metrics **: { wrap_key_words (data_adapter .metric_list )} ' ,
171- f" - **Requires LLM Judge **: { 'Yes' if data_adapter .llm_as_a_judge else 'No' } " ,
172- f'- **Default Shots **: { data_adapter .few_shot_num } -shot'
168+ f'[{ text [ "back_to_top" ] } ](# { text [ "toc_title" ]. lower (). replace ( " " , "-" ) } )' ,
169+ f'- **{ text [ "dataset_name" ] } **: `{ name } `' ,
170+ f'- **{ text [ "dataset_id" ] } **: { dataset_id_md } ' ,
171+ f'- **{ text [ "description" ] } **: \n > { description } ' ,
172+ f'- **{ text [ "task_categories" ] } **: { wrap_key_words (data_adapter .tags )} ' ,
173+ f'- **{ text [ "evaluation_metrics" ] } **: { wrap_key_words (data_adapter .metric_list )} ' ,
174+ f' - **{ text [ "requires_llm_judge" ] } **: { text [ "yes" ] if data_adapter .llm_as_a_judge else text [ "no" ] } ' ,
175+ f'- **{ text [ "default_shots" ] } **: { data_adapter .few_shot_num } -shot'
173176 ]
174177
175178 # Add dataset subsets
176179 if data_adapter .subset_list :
177- details .append (f'- **Subsets **: { wrap_key_words (data_adapter .subset_list )} ' )
180+ details .append (f'- **{ text [ "subsets" ] } **: { wrap_key_words (data_adapter .subset_list )} ' )
178181
179182 # Add technical information
180183 technical_info = [
181- f'- **Supported Output Formats **: { wrap_key_words (data_adapter .output_types )} ' ,
184+ f'- **{ text [ "supported_output_formats" ] } **: { wrap_key_words (data_adapter .output_types )} ' ,
182185 ]
183186
184187 # Add extra parameters
185188 extra_params = data_adapter .config_kwargs .get ('extra_params' , {})
186189 if extra_params :
187- technical_info .append (f'- **Extra Parameters **: \n ```json\n { process_dictionary (extra_params )} \n ```' )
190+ technical_info .append (f'- **{ text [ "extra_parameters" ] } **: \n ```json\n { process_dictionary (extra_params )} \n ```' )
188191
189192 # Add prompt templates
190193 if data_adapter .system_prompt :
191- technical_info .append (f'- **System Prompt **: \n ```text\n { data_adapter .system_prompt } \n ```' )
194+ technical_info .append (f'- **{ text [ "system_prompt" ] } **: \n ```text\n { data_adapter .system_prompt } \n ```' )
192195 if data_adapter .prompt_template :
193- technical_info .append (f'- **Prompt Template **: \n ```text\n { data_adapter .prompt_template } \n ```' )
196+ technical_info .append (f'- **{ text [ "prompt_template" ] } **: \n ```text\n { data_adapter .prompt_template } \n ```' )
194197
195198 return '\n ' .join (details + ['' ] + technical_info + ['' ])
196199
197-
198- def generate_full_documentation_en (adapters : list [DataAdapter ]) -> str :
200+ def generate_full_documentation (adapters : list [DataAdapter ], lang : str = 'zh' ) -> str :
199201 """
200202 Generate complete Markdown documentation with index and all benchmark details
201203
202204 Args:
203205 adapters (list[DataAdapter]): List of DataAdapter instances
206+ lang (str): Language code ('zh' for Chinese, 'en' for English)
204207
205208 Returns:
206209 str: Complete Markdown document
207210 """
211+ # Get localized text
212+ text = get_document_locale (lang )
213+
208214 # Generate index
209215 index = [
210- '# LLM Benchmarks ' ,
216+ f '# { text [ "title" ] } ' ,
211217 '' ,
212- 'Below is the list of supported LLM benchmarks. Click on a benchmark name to jump to details. ' ,
218+ f' { text [ "intro" ] } ' ,
213219 '' ,
214- '| Benchmark Name | Pretty Name | Task Categories |' ,
215- '|---------------- |-------------|------ ----------|' ,
220+ f '| { text [ "dataset_name" ] } | { text [ "pretty_name" ] } | { text [ "task_categories" ] } |' ,
221+ '|------------|----------| ----------|' ,
216222 ]
217223
218224 for adapter in adapters :
@@ -227,12 +233,12 @@ def generate_full_documentation_en(adapters: list[DataAdapter]) -> str:
227233 '' ,
228234 '---' ,
229235 '' ,
230- '## Benchmark Details ' ,
236+ f '## { text [ "details_title" ] } ' ,
231237 ''
232238 ]
233239
234240 for i , adapter in enumerate (adapters ):
235- details .append (generate_dataset_markdown_en (adapter ))
241+ details .append (generate_dataset_markdown (adapter , lang ))
236242 if i < len (adapters ) - 1 :
237243 details .append ('---' )
238244 details .append ('' )
@@ -253,8 +259,8 @@ def generate_full_documentation_en(adapters: list[DataAdapter]) -> str:
253259 adapters .sort (key = lambda x : x .name ) # 按名称排序
254260
255261 # 生成完整文档
256- markdown_doc = generate_full_documentation (adapters )
257- markdown_doc_en = generate_full_documentation_en (adapters )
262+ markdown_doc = generate_full_documentation (adapters , 'zh' )
263+ markdown_doc_en = generate_full_documentation (adapters , 'en' )
258264
259265 # 输出到文件
260266 with open ('docs/zh/get_started/supported_dataset/llm.md' , 'w' , encoding = 'utf-8' ) as f :
0 commit comments