Skip to content

Commit db56894

Browse files
authored
feat: replace hard-coded cache paths with dynamic cache_path config (#952)
* feat: replace hard-coded cache paths with dynamic cache_path config * style: reorder wait_retry import and format chmod list * refactor: pass workspace_path to chmod command and use DockerConf check
1 parent d4dbaae commit db56894

File tree

5 files changed

+28
-9
lines changed

5 files changed

+28
-9
lines changed

rdagent/components/coder/data_science/feature/prompts.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ feature_coder:
4646
5. You should use the following cache decorator to cache the results of the function:
4747
```python
4848
from joblib import Memory
49-
memory = Memory(location='./cache', verbose=0)
49+
memory = Memory(location='{% include "scenarios.data_science.share:scen.cache_path" %}', verbose=0)
5050
@memory.cache```
5151
6. Coding tricks:
52-
- If the input consists of a batch of file paths and you need to modify the file contents to complete your feature engineering task, you can accomplish your feature engineering task by modifying these files and creating new files in a subfolder within "./cache" (this path is persistent, otherwise you may lose your created file). Then the new file paths are returned.
52+
- If the input consists of a batch of file paths and you need to modify the file contents to complete your feature engineering task, you can accomplish your feature engineering task by modifying these files and creating new files in a subfolder within "{% include "scenarios.data_science.share:scen.cache_path" %}" (this path is persistent, otherwise you may lose your created file). Then the new file paths are returned.
5353
5454
{% include "scenarios.data_science.share:guidelines.coding" %}
5555

rdagent/components/coder/data_science/model/prompts.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ model_coder:
4343
4. You should use the following cache decorator to cache the results of the function:
4444
```python
4545
from joblib import Memory
46-
memory = Memory(location='./cache', verbose=0)
46+
memory = Memory(location='{% include "scenarios.data_science.share:scen.cache_path" %}', verbose=0)
4747
@memory.cache``
4848
{% include "scenarios.data_science.share:guidelines.coding" %}
4949

rdagent/components/coder/data_science/raw_data_loader/prompts.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ data_loader_coder:
273273
3. You should use the following cache decorator to cache the results of the function:
274274
```python
275275
from joblib import Memory
276-
memory = Memory(location='./cache', verbose=0)
276+
memory = Memory(location='{% include "scenarios.data_science.share:scen.cache_path" %}', verbose=0)
277277
@memory.cache```
278278
{% include "scenarios.data_science.share:guidelines.coding" %}
279279

rdagent/scenarios/data_science/share.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ describe: # some template to describe some object
6161
scen: # customizable
6262
role: |-
6363
You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
64-
input_path: "./input/"
64+
input_path: "./workspace_input/"
65+
cache_path: "./workspace_cache/"
6566

6667
component_description:
6768
DataLoadSpec: |-

rdagent/utils/env.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from rdagent.core.experiment import RD_AGENT_SETTINGS
4141
from rdagent.log import rdagent_logger as logger
4242
from rdagent.oai.llm_utils import md5_hash
43+
from rdagent.utils.agent.tpl import T
4344
from rdagent.utils.workflow import wait_retry
4445

4546

@@ -240,15 +241,32 @@ def run_ret_code(
240241
# FIXME: the input path and cache path is hard coded here.
241242
# We don't want to change the content in input and cache path.
242243
# Otherwise, it may produce large amount of warnings.
244+
def _get_chmod_cmd(workspace_path: str) -> str:
245+
def _get_path_stem(path: str) -> str | None:
246+
# If the input path is relative, keep only the first component
247+
p = Path(path)
248+
if not p.is_absolute() and p.parts:
249+
return p.parts[0]
250+
return None
251+
252+
chmod_cmd = f"chmod -R 777 $(find {workspace_path} -mindepth 1 -maxdepth 1"
253+
for name in [
254+
_get_path_stem(T("scenarios.data_science.share:scen.cache_path").r()),
255+
_get_path_stem(T("scenarios.data_science.share:scen.input_path").r()),
256+
]:
257+
chmod_cmd += f" ! -name {name}"
258+
chmod_cmd += ")"
259+
return chmod_cmd
260+
243261
entry_add_timeout = (
244262
f"/bin/sh -c 'timeout --kill-after=10 {self.conf.running_timeout_period} {entry}; "
245263
+ "entry_exit_code=$?; "
246264
+ (
247-
f"chmod -R 777 $(find {self.conf.mount_path} -mindepth 1 -maxdepth 1 ! -name cache ! -name input); "
265+
f"{_get_chmod_cmd(self.conf.mount_path)}"
248266
# We don't have to change the permission of the cache and input folder to remove it
249267
# + f"if [ -d {self.conf.mount_path}/cache ]; then chmod 777 {self.conf.mount_path}/cache; fi; " +
250268
# f"if [ -d {self.conf.mount_path}/input ]; then chmod 777 {self.conf.mount_path}/input; fi; "
251-
if hasattr(self.conf, "mount_path")
269+
if isinstance(self.conf, DockerConf)
252270
else ""
253271
)
254272
+ "exit $entry_exit_code'"
@@ -409,7 +427,7 @@ def _run_ret_code(
409427
volumes[lp] = rp
410428
cache_path = "/tmp/sample" if "/sample/" in "".join(self.conf.extra_volumes.keys()) else "/tmp/full"
411429
Path(cache_path).mkdir(parents=True, exist_ok=True)
412-
volumes[cache_path] = "./cache"
430+
volumes[cache_path] = T("scenarios.data_science.share:scen.cache_path").r()
413431
for lp, rp in running_extra_volume.items():
414432
volumes[lp] = rp
415433

@@ -821,7 +839,7 @@ def _run_ret_code(
821839
volumes[lp] = {"bind": rp, "mode": self.conf.extra_volume_mode}
822840
cache_path = "/tmp/sample" if "/sample/" in "".join(self.conf.extra_volumes.keys()) else "/tmp/full"
823841
Path(cache_path).mkdir(parents=True, exist_ok=True)
824-
volumes[cache_path] = {"bind": "./cache", "mode": "rw"}
842+
volumes[cache_path] = {"bind": T("scenarios.data_science.share:scen.cache_path").r(), "mode": "rw"}
825843
for lp, rp in running_extra_volume.items():
826844
volumes[lp] = {"bind": rp, "mode": self.conf.extra_volume_mode}
827845

0 commit comments

Comments
 (0)