|
22 | 22 | from rdagent.components.coder.data_science.workflow.exp import WorkflowTask
|
23 | 23 | from rdagent.components.workflow.conf import BasePropSetting
|
24 | 24 | from rdagent.components.workflow.rd_loop import RDLoop
|
| 25 | +from rdagent.core.conf import RD_AGENT_SETTINGS |
25 | 26 | from rdagent.core.exception import CoderError, RunnerError
|
26 | 27 | from rdagent.core.proposal import ExperimentFeedback
|
27 | 28 | from rdagent.core.scenario import Scenario
|
@@ -175,21 +176,46 @@ def record(self, prev_out: dict[str, Any]):
|
175 | 176 | and Path(DS_RD_SETTING.log_archive_path).is_dir()
|
176 | 177 | ):
|
177 | 178 | start_archive_datetime = datetime.now()
|
178 |
| - logger.info(f"Archiving log folder after loop {self.loop_idx}") |
179 |
| - tar_path = ( |
| 179 | + logger.info(f"Archiving log and workspace folder after loop {self.loop_idx}") |
| 180 | + mid_log_tar_path = ( |
180 | 181 | Path(
|
181 | 182 | DS_RD_SETTING.log_archive_temp_path
|
182 | 183 | if DS_RD_SETTING.log_archive_temp_path
|
183 | 184 | else DS_RD_SETTING.log_archive_path
|
184 | 185 | )
|
185 | 186 | / "mid_log.tar"
|
186 | 187 | )
|
187 |
| - subprocess.run(["tar", "-cf", str(tar_path), "-C", (Path().cwd() / "log"), "."], check=True) |
| 188 | + mid_workspace_tar_path = ( |
| 189 | + Path( |
| 190 | + DS_RD_SETTING.log_archive_temp_path |
| 191 | + if DS_RD_SETTING.log_archive_temp_path |
| 192 | + else DS_RD_SETTING.log_archive_path |
| 193 | + ) |
| 194 | + / "mid_workspace.tar" |
| 195 | + ) |
| 196 | + subprocess.run(["tar", "-cf", str(mid_log_tar_path), "-C", (Path().cwd() / "log"), "."], check=True) |
| 197 | + |
| 198 | + # remove all files and folders in the workspace except for .py, .md, and .csv files to avoid large workspace dump |
| 199 | + for workspace_id in Path(RD_AGENT_SETTINGS.workspace_path).iterdir(): |
| 200 | + for file_and_folder in workspace_id.iterdir(): |
| 201 | + if file_and_folder.is_dir(): |
| 202 | + shutil.rmtree(file_and_folder) |
| 203 | + elif file_and_folder.is_file() and file_and_folder.suffix not in [".py", ".md", ".csv"]: |
| 204 | + file_and_folder.unlink() |
| 205 | + |
| 206 | + subprocess.run( |
| 207 | + ["tar", "-cf", str(mid_workspace_tar_path), "-C", (RD_AGENT_SETTINGS.workspace_path), "."], check=True |
| 208 | + ) |
188 | 209 | if DS_RD_SETTING.log_archive_temp_path is not None:
|
189 |
| - shutil.move(tar_path, Path(DS_RD_SETTING.log_archive_path) / "mid_log.tar") |
190 |
| - tar_path = Path(DS_RD_SETTING.log_archive_path) / "mid_log.tar" |
| 210 | + shutil.move(mid_log_tar_path, Path(DS_RD_SETTING.log_archive_path) / "mid_log.tar") |
| 211 | + mid_log_tar_path = Path(DS_RD_SETTING.log_archive_path) / "mid_log.tar" |
| 212 | + shutil.move(mid_workspace_tar_path, Path(DS_RD_SETTING.log_archive_path) / "mid_workspace.tar") |
| 213 | + mid_workspace_tar_path = Path(DS_RD_SETTING.log_archive_path) / "mid_workspace.tar" |
| 214 | + shutil.copy( |
| 215 | + mid_log_tar_path, Path(DS_RD_SETTING.log_archive_path) / "mid_log_bak.tar" |
| 216 | + ) # backup when upper code line is killed when running |
191 | 217 | shutil.copy(
|
192 |
| - tar_path, Path(DS_RD_SETTING.log_archive_path) / "mid_log_bak.tar" |
| 218 | + mid_workspace_tar_path, Path(DS_RD_SETTING.log_archive_path) / "mid_workspace_bak.tar" |
193 | 219 | ) # backup when upper code line is killed when running
|
194 | 220 | self.timer.add_duration(datetime.now() - start_archive_datetime)
|
195 | 221 |
|
|
0 commit comments