@@ -70,6 +70,26 @@ def replace_tab_item(tab_match):
7070 # 替换所有tab-set
7171 return pattern .sub (replace_tab_set , content )
7272
73+ def remove_identifier_lines (content ):
74+ """删除Markdown文件开头的标识行,格式为(.*)=,只删除第一个匹配项"""
75+ pattern = re .compile (r'^\(.*\)=\s*$' , re .MULTILINE )
76+
77+ # 找到第一个匹配项的位置
78+ match = pattern .search (content )
79+ if match :
80+ # 计算要删除的文本范围(包括后面的换行符)
81+ start = match .start ()
82+ end = match .end ()
83+
84+ # 检查后面是否有换行符,如果有则一起删除
85+ if end < len (content ) and content [end ] == '\n ' :
86+ end += 1
87+
88+ # 删除匹配的文本
89+ content = content [:start ] + content [end :]
90+
91+ return content
92+
7393def process_markdown_file (source_path , target_path ):
7494 """处理单个Markdown文件"""
7595 try :
@@ -81,6 +101,7 @@ def process_markdown_file(source_path, target_path):
81101 content = process_code_blocks (content )
82102 content = process_admonitions (content )
83103 content = process_tab_sets (content )
104+ content = remove_identifier_lines (content )
84105
85106 # 确保目标目录存在,并写入处理后的内容
86107 os .makedirs (os .path .dirname (target_path ), exist_ok = True )
@@ -91,23 +112,34 @@ def process_markdown_file(source_path, target_path):
91112 print (f'处理文件 { source_path } 时出错: { e } ' )
92113 return False
93114
94- def get_markdown_files (source_dir ):
115+
116+ def get_markdown_files (source_dir , skip_dirs = None ):
95117 """获取所有需要处理的Markdown文件列表"""
96118 markdown_files = []
119+ skip_dirs = skip_dirs or set ()
120+
97121 for root , dirs , files in os .walk (source_dir ):
122+ # 检查当前目录是否包含需要跳过的目录名
123+ if any (skip_dir in root for skip_dir in skip_dirs ):
124+ continue
125+
126+ # 检查当前目录的直接子目录是否需要跳过
127+ dirs [:] = [d for d in dirs if d not in skip_dirs ]
128+
98129 for file in files :
99130 if file .endswith ('.md' ) and file != 'index.md' :
100131 source_path = os .path .join (root , file )
101132 markdown_files .append (source_path )
133+
102134 return markdown_files
103135
104- def main (source_dir , target_dir ):
105-
136+ def main (source_dir , target_dir , skip_dirs = None ):
137+
106138 print (f'开始处理目录: { source_dir } ' )
107139 print (f'输出目录: { target_dir } ' )
108140
109141 # 获取所有需要处理的Markdown文件
110- markdown_files = get_markdown_files (source_dir )
142+ markdown_files = get_markdown_files (source_dir , skip_dirs = skip_dirs )
111143 print (f'找到 { len (markdown_files )} 个需要处理的Markdown文件' )
112144
113145 # 使用tqdm显示进度条
@@ -124,7 +156,19 @@ def main(source_dir, target_dir):
124156 print (f'处理完成!成功处理 { success_count } /{ len (markdown_files )} 个文件' )
125157 print (f'处理后的文件已保存到: { target_dir } ' )
126158
159+
160+ def process_zh ():
161+ source_directory = '/Users/yunlin/Code/eval-scope/docs/zh'
162+ target_directory = '/Users/yunlin/Code/documentation/tutorial/模型评测'
163+ skip_dirs = {'blog' , 'experiments' }
164+ main (source_directory , target_directory , skip_dirs )
165+
166+ def process_en ():
167+ source_directory = '/Users/yunlin/Code/eval-scope/docs/en'
168+ target_directory = '/Users/yunlin/Code/documentation/tutorial-en/Model Evaluation'
169+ skip_dirs = {'blog' , 'experiments' }
170+ main (source_directory , target_directory , skip_dirs )
171+
127172if __name__ == '__main__' :
128- source_directory = 'docs/zh'
129- target_directory = 'docs/zh-new'
130- main (source_directory , target_directory )
173+ process_zh ()
174+ process_en ()
0 commit comments