microsoft · you-n-g · Jun 12, 2025 · Jun 4, 2025 · Jun 4, 2025 · Jun 5, 2025
diff --git a/rdagent/app/data_mining/model.py b/rdagent/app/data_mining/model.py
@@ -1,3 +1,5 @@
+import asyncio
+
 import fire
 
 from rdagent.app.data_mining.conf import MED_PROP_SETTING
@@ -24,7 +26,7 @@ def main(path=None, step_n=None, loop_n=None, all_duration=None, checkout=True):
         model_loop = ModelRDLoop(MED_PROP_SETTING)
     else:
         model_loop = ModelRDLoop.load(path, checkout=checkout)
-    model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration)
+    asyncio.run(model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration))
 
 
 if __name__ == "__main__":

diff --git a/rdagent/app/data_science/loop.py b/rdagent/app/data_science/loop.py
@@ -1,3 +1,4 @@
+import asyncio
 from pathlib import Path
 
 import fire
@@ -66,7 +67,7 @@ def main(
     if exp_gen_cls is not None:
         kaggle_loop.exp_gen = import_class(exp_gen_cls)(kaggle_loop.exp_gen.scen)
 
-    kaggle_loop.run(step_n=step_n, loop_n=loop_n, all_duration=timeout)
+    asyncio.run(kaggle_loop.run(step_n=step_n, loop_n=loop_n, all_duration=timeout))
 
 
 if __name__ == "__main__":

diff --git a/rdagent/app/qlib_rd_loop/factor.py b/rdagent/app/qlib_rd_loop/factor.py
@@ -2,6 +2,7 @@
 Factor workflow with session control
 """
 
+import asyncio
 from typing import Any
 
 import fire
@@ -40,7 +41,7 @@ def main(path=None, step_n=None, loop_n=None, all_duration=None, checkout=True):
         model_loop = FactorRDLoop(FACTOR_PROP_SETTING)
     else:
         model_loop = FactorRDLoop.load(path, checkout=checkout)
-    model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration)
+    asyncio.run(model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration))
 
 
 if __name__ == "__main__":

diff --git a/rdagent/app/qlib_rd_loop/factor_from_report.py b/rdagent/app/qlib_rd_loop/factor_from_report.py
@@ -1,3 +1,4 @@
+import asyncio
 import json
 from pathlib import Path
 from typing import Any, Dict, Tuple
@@ -162,7 +163,7 @@ def main(report_folder=None, path=None, step_n=None, loop_n=None, all_duration=N
     else:
         model_loop = FactorReportLoop(report_folder=report_folder)
 
-    model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration)
+    asyncio.run(model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration))
 
 
 if __name__ == "__main__":

diff --git a/rdagent/app/qlib_rd_loop/model.py b/rdagent/app/qlib_rd_loop/model.py
@@ -2,6 +2,8 @@
 Model workflow with session control
 """
 
+import asyncio
+
 import fire
 
 from rdagent.app.qlib_rd_loop.conf import MODEL_PROP_SETTING
@@ -28,7 +30,7 @@ def main(path=None, step_n=None, loop_n=None, all_duration=None, checkout=True):
         model_loop = ModelRDLoop(MODEL_PROP_SETTING)
     else:
         model_loop = ModelRDLoop.load(path, checkout=checkout)
-    model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration)
+    asyncio.run(model_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration))
 
 
 if __name__ == "__main__":

diff --git a/rdagent/app/qlib_rd_loop/quant.py b/rdagent/app/qlib_rd_loop/quant.py
@@ -2,6 +2,7 @@
 Quant (Factor & Model) workflow with session control
 """
 
+import asyncio
 from typing import Any
 
 import fire
@@ -130,7 +131,8 @@ def main(path=None, step_n=None, loop_n=None, all_duration=None, checkout=True):
         quant_loop = QuantRDLoop(QUANT_PROP_SETTING)
     else:
         quant_loop = QuantRDLoop.load(path, checkout=checkout)
-    quant_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration)
+
+    asyncio.run(quant_loop.run(step_n=step_n, loop_n=loop_n, all_duration=all_duration))
 
 
 if __name__ == "__main__":

diff --git a/rdagent/components/coder/data_science/feature/prompts.yaml b/rdagent/components/coder/data_science/feature/prompts.yaml
@@ -46,10 +46,10 @@ feature_coder:
     5. You should use the following cache decorator to cache the results of the function:
     ```python
     from joblib import Memory
-    memory = Memory(location='/tmp/cache', verbose=0)
+    memory = Memory(location='./cache', verbose=0)
     @memory.cache```
     6. Coding tricks:
-      - If the input consists of a batch of file paths and you need to modify the file contents to complete your feature engineering task, you can accomplish your feature engineering task by modifying these files and creating new files in a subfolder within "/tmp/cache" (this path is persistent, otherwise you may lose your created file). Then the new file paths are returned.
+      - If the input consists of a batch of file paths and you need to modify the file contents to complete your feature engineering task, you can accomplish your feature engineering task by modifying these files and creating new files in a subfolder within "./cache" (this path is persistent, otherwise you may lose your created file). Then the new file paths are returned.
 
     {% include "scenarios.data_science.share:guidelines.coding" %}
 

diff --git a/rdagent/components/coder/data_science/model/prompts.yaml b/rdagent/components/coder/data_science/model/prompts.yaml
@@ -43,7 +43,7 @@ model_coder:
     4. You should use the following cache decorator to cache the results of the function:
     ```python
     from joblib import Memory
-    memory = Memory(location='/tmp/cache', verbose=0)
+    memory = Memory(location='./cache', verbose=0)
     @memory.cache``
     {% include "scenarios.data_science.share:guidelines.coding" %}
 

diff --git a/rdagent/components/coder/data_science/raw_data_loader/prompts.yaml b/rdagent/components/coder/data_science/raw_data_loader/prompts.yaml
@@ -273,7 +273,7 @@ data_loader_coder:
     3. You should use the following cache decorator to cache the results of the function:
     ```python
     from joblib import Memory
-    memory = Memory(location='/tmp/cache', verbose=0)
+    memory = Memory(location='./cache', verbose=0)
     @memory.cache```
     {% include "scenarios.data_science.share:guidelines.coding" %}
 

diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py
@@ -78,5 +78,24 @@ class RDAgentSettings(ExtendedBaseSettings):
 
     initial_fator_library_size: int = 20
 
+    # parallel loop
+    step_semaphore: int | dict[str, int] = 1
+    """the semaphore for each step;  you can specify a overall semaphore
+    or a step-wise semaphore like {"coding": 3, "running": 2}"""
+
+    def get_max_parallel(self) -> int:
+        """Based on the setting of semaphore, return the maximum number of parallel loops"""
+        if isinstance(self.step_semaphore, int):
+            return self.step_semaphore
+        else:
+            return max(self.step_semaphore.values())
+
+    # NOTE: for debug
+    # the following function only serves as debugging and is necessary in main logic.
+    subproc_step: bool = False
+
+    def is_force_subproc(self) -> bool:
+        return self.subproc_step or self.get_max_parallel() > 1
+
 
 RD_AGENT_SETTINGS = RDAgentSettings()
diff --git a/rdagent/core/proposal.py b/rdagent/core/proposal.py
@@ -2,14 +2,19 @@
 
 from __future__ import annotations
 
+import asyncio
 from abc import ABC, abstractmethod
-from typing import Generic, List, Tuple, TypeVar
+from typing import TYPE_CHECKING, Generic, List, Tuple, TypeVar
 
+from rdagent.core.conf import RD_AGENT_SETTINGS
 from rdagent.core.evaluation import Feedback
 from rdagent.core.experiment import ASpecificExp, Experiment
 from rdagent.core.knowledge_base import KnowledgeBase
 from rdagent.core.scenario import Scenario
 
+if TYPE_CHECKING:
+    from rdagent.utils.workflow.loop import LoopBase
+
 
 class Hypothesis:
     """
@@ -248,6 +253,17 @@ def gen(self, trace: Trace) -> Experiment:
             )
         """
 
+    async def async_gen(self, trace: Trace, loop: LoopBase) -> Experiment:
+        """
+        generate the experiment and decide whether to stop yield generation and give up control to other routines.
+        """
+        # we give a default implementation here.
+        # The proposal is set to try best to generate the experiment in max-parallel level.
+        while True:
+            if loop.get_unfinished_loop_cnt(loop.loop_idx) < RD_AGENT_SETTINGS.get_max_parallel():
+                return self.gen(trace)
+            await asyncio.sleep(1)
+
 
 class HypothesisGen(ABC):
 

diff --git a/rdagent/log/conf.py b/rdagent/log/conf.py
@@ -12,6 +12,9 @@ class LogSettings(ExtendedBaseSettings):
 
     trace_path: str = str(Path.cwd() / "log" / datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f"))
 
+    format_console: str | None = None
+    """"If it is None, leave it as the default"""
+
     ui_server_port: int | None = None
 
     storages: dict[str, list[int | str]] = {}

diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py
@@ -1,17 +1,24 @@
 import os
 import sys
 from contextlib import contextmanager
+from contextvars import ContextVar
 from datetime import datetime
 from pathlib import Path
 from typing import Generator
 
 from loguru import logger
+
+from .conf import LOG_SETTINGS
+
+if LOG_SETTINGS.format_console is not None:
+    logger.remove()
+    logger.add(sys.stdout, format=LOG_SETTINGS.format_console)
+
 from psutil import Process
 
 from rdagent.core.utils import SingletonBaseClass, import_class
 
 from .base import Storage
-from .conf import LOG_SETTINGS
 from .storage import FileStorage
 from .utils import get_caller_info
 
@@ -39,14 +46,16 @@ class RDAgentLog(SingletonBaseClass):
 
     """
 
-    # TODO: Simplify it to introduce less concepts ( We may merge RDAgentLog, Storage &)
-    # Solution:  Storage => PipeLog, View => PipeLogView, RDAgentLog is an instance of PipeLogger
-    # PipeLogger.info(...) ,  PipeLogger.get_resp() to get feedback from frontend.
-    # def f():
-    #   logger = PipeLog()
-    #   logger.info("<code>")
-    #   feedback = logger.get_reps()
-    _tag: str = ""
+    # Thread-/coroutine-local tag;  In Linux forked subprocess, it will be copied to the subprocess.
+    _tag_ctx: ContextVar[str] = ContextVar("_tag_ctx", default="")
+
+    @property
+    def _tag(self) -> str:  # Get current tag
+        return self._tag_ctx.get()
+
+    @_tag.setter  # Set current tag
+    def _tag(self, value: str) -> None:
+        self._tag_ctx.set(value)
 
     def __init__(self) -> None:
         self.storage = FileStorage(LOG_SETTINGS.trace_path)
@@ -61,15 +70,16 @@ def __init__(self) -> None:
     def tag(self, tag: str) -> Generator[None, None, None]:
         if tag.strip() == "":
             raise ValueError("Tag cannot be empty.")
-        if self._tag != "":
-            tag = "." + tag
-
-        # TODO: It may result in error in mutithreading or co-routine
-        self._tag = self._tag + tag
+        # Generate a new complete tag
+        current_tag = self._tag_ctx.get()
+        new_tag = tag if current_tag == "" else f"{current_tag}.{tag}"
+        # Set and save token for later restore
+        token = self._tag_ctx.set(new_tag)
         try:
             yield
         finally:
-            self._tag = self._tag[: -len(tag)]
+            # Restore previous tag (thread/coroutine safe)
+            self._tag_ctx.reset(token)
 
     def set_storages_path(self, path: str | Path) -> None:
         for storage in [self.storage] + self.other_storages:
@@ -96,7 +106,6 @@ def get_pids(self) -> str:
         return pid_chain
 
     def log_object(self, obj: object, *, tag: str = "") -> None:
-        # TODO: I think we can merge the log_object function with other normal log methods to make the interface simpler.
         caller_info = get_caller_info()
         tag = f"{self._tag}.{tag}.{self.get_pids()}".strip(".")
 

diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py
@@ -1,7 +1,9 @@
+import copyreg
 from typing import Any, Literal, cast
 
 import numpy as np
 from litellm import (
+    BadRequestError,
     completion,
     completion_cost,
     embedding,
@@ -15,6 +17,17 @@
 from rdagent.oai.llm_conf import LLMSettings
 
 
+# NOTE: Patching! Otherwise, the exception will call the constructor and with following error:
+# `BadRequestError.__init__() missing 2 required positional arguments: 'model' and 'llm_provider'`
+def _reduce_no_init(exc: Exception) -> tuple:
+    cls = exc.__class__
+    return (cls.__new__, (cls,), exc.__dict__)
+
+
+# suppose you want to apply this to MyError
+copyreg.pickle(BadRequestError, _reduce_no_init)
+
+
 class LiteLLMSettings(LLMSettings):
 
     class Config: