feat: add naive experiment generator and update proposal configurations (#759)

you-n-g · web-flow · commit 75494f4fed5b · 2025-04-07T13:01:29.000+08:00
* feat: Add naive experiment generator and update proposal configurations

* lint

* lint
diff --git a/rdagent/app/data_science/conf.py b/rdagent/app/data_science/conf.py
@@ -24,8 +24,10 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     #### enable specification
     spec_enabled: bool = True
 
+    ### proposal related
     proposal_version: str = "v1"
     coder_on_whole_pipeline: bool = False
+    max_trace_hist: int = 3
 
     coder_max_loop: int = 10
     runner_max_loop: int = 3
diff --git a/rdagent/components/coder/data_science/pipeline/exp.py b/rdagent/components/coder/data_science/pipeline/exp.py
@@ -3,4 +3,5 @@
 
 # Because we use isinstance to distinguish between different types of tasks, we need to use sub classes to represent different types of tasks
 class PipelineTask(CoSTEERTask):
-    pass
+    def __init__(self, name: str = "Pipeline", *args, **kwargs) -> None:
+        super().__init__(name=name, *args, **kwargs)
diff --git a/rdagent/components/coder/data_science/workflow/exp.py b/rdagent/components/coder/data_science/workflow/exp.py
@@ -10,4 +10,5 @@
 
 # Because we use isinstance to distinguish between different types of tasks, we need to use sub classes to represent different types of tasks
 class WorkflowTask(CoSTEERTask):
-    pass
+    def __init__(self, name: str = "Workflow", *args, **kwargs) -> None:
+        super().__init__(name=name, *args, **kwargs)
diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py
@@ -39,7 +39,7 @@ def base_iter(settings_cls: type[ExtendedBaseSettings]) -> list[type[ExtendedBas
                 env_prefix=base_cls.model_config.get("env_prefix"),
                 env_nested_delimiter=base_cls.model_config.get("env_nested_delimiter"),
             )
-            for base_cls in base_iter(cast(type[ExtendedBaseSettings], settings_cls))
+            for base_cls in base_iter(cast("type[ExtendedBaseSettings]", settings_cls))
         ]
         return init_settings, env_settings, *parent_env_settings, dotenv_settings, file_secret_settings
 
diff --git a/rdagent/core/experiment.py b/rdagent/core/experiment.py
@@ -10,7 +10,7 @@
 from collections.abc import Sequence
 from copy import deepcopy
 from pathlib import Path
-from typing import Any, Generic, Literal, TypeVar
+from typing import Any, Generic, TypeVar
 
 from rdagent.core.conf import RD_AGENT_SETTINGS
 from rdagent.core.evaluation import Feedback
diff --git a/rdagent/core/utils.py b/rdagent/core/utils.py
@@ -69,7 +69,7 @@ def similarity(text1: str, text2: str) -> int:
     text2 = text2 if isinstance(text2, str) else ""
 
     # Maybe we can use other similarity algorithm such as tfidf
-    return cast(int, fuzz.ratio(text1, text2))  # mypy does not regard it as int
+    return cast("int", fuzz.ratio(text1, text2))  # mypy does not regard it as int
 
 
 def import_class(class_path: str) -> Any:
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/__init__.py b/rdagent/scenarios/data_science/proposal/exp_gen/__init__.py
@@ -1,5 +1,6 @@
 from rdagent.app.data_science.conf import DS_RD_SETTING
 from rdagent.core.proposal import ExpGen
+from rdagent.core.utils import import_class
 from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
 from rdagent.scenarios.data_science.proposal.exp_gen.base import DSTrace
 from rdagent.scenarios.data_science.proposal.exp_gen.draft import DSDraftExpGen
@@ -11,32 +12,29 @@
 
 
 class DSExpGen(ExpGen):
-    """Data Science Task Generator."""
+    """
+    Data Science Task Generator.
+    This is a experiment router generator;
+    """
 
-    def __init__(self, scen: DataScienceScen, max_trace_hist: int = 3) -> None:
-        self.max_trace_hist = max_trace_hist  # max number of historical trace to know when propose new experiment
+    def __init__(self, scen: DataScienceScen) -> None:
         super().__init__(scen)
 
     def gen(self, trace: DSTrace) -> DSExperiment:
+
+        if DS_RD_SETTING.proposal_version not in ["v1", "v2"]:
+            return import_class(DS_RD_SETTING.proposal_version)(scen=self.scen).gen(trace=trace)
+
         if DS_RD_SETTING.coder_on_whole_pipeline:
-            return DSProposalV2ExpGen(scen=self.scen).gen(
-                trace=trace,
-                max_trace_hist=self.max_trace_hist,
-                pipeline=True,
-            )
+            return DSProposalV2ExpGen(scen=self.scen).gen(trace=trace, pipeline=True)
+
         next_missing_component = trace.next_incomplete_component()
         if next_missing_component is not None:
             return DSDraftExpGen(scen=self.scen).gen(
                 component=next_missing_component,
                 trace=trace,
             )
         if DS_RD_SETTING.proposal_version == "v1":
-            return DSProposalV1ExpGen(scen=self.scen).gen(
-                trace=trace,
-                max_trace_hist=self.max_trace_hist,
-            )
+            return DSProposalV1ExpGen(scen=self.scen).gen(trace=trace)
         if DS_RD_SETTING.proposal_version == "v2":
-            return DSProposalV2ExpGen(scen=self.scen).gen(
-                trace=trace,
-                max_trace_hist=self.max_trace_hist,
-            )
+            return DSProposalV2ExpGen(scen=self.scen).gen(trace=trace)
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/naive.py b/rdagent/scenarios/data_science/proposal/exp_gen/naive.py
@@ -0,0 +1,64 @@
+"""
+The most naive way to design experiments
+"""
+
+from rdagent.app.data_science.conf import DS_RD_SETTING
+from rdagent.components.coder.data_science.pipeline.exp import PipelineTask
+from rdagent.core.proposal import ExpGen
+from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
+from rdagent.scenarios.data_science.proposal.exp_gen.base import DSHypothesis, DSTrace
+from rdagent.utils.agent.tpl import T
+from rdagent.utils.agent.workflow import build_cls_from_json_with_retry
+
+
+class NaiveExpGen(ExpGen):
+    def gen(self, trace: DSTrace) -> DSExperiment:
+        sota_exp = trace.sota_experiment()
+        scenario_desc = trace.scen.get_scenario_all_desc()
+        competition_desc = trace.scen.get_competition_full_desc()
+        sota_exp_desc = T("scenarios.data_science.share:describe.exp").r(
+            exp=sota_exp, heading="Best of previous exploration of the scenario"
+        )
+
+        sota_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="sota")
+        failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[
+            -DS_RD_SETTING.max_trace_hist :
+        ]
+
+        sota_exp_and_feedback_list_desc = T("scenarios.data_science.share:describe.trace").r(
+            exp_and_feedback_list=sota_exp_feedback_list,
+            success=True,
+        )
+        failed_exp_and_feedback_list_desc = T("scenarios.data_science.share:describe.trace").r(
+            exp_and_feedback_list=failed_exp_feedback_list,
+            success=False,
+        )
+
+        sys_prompt = T(".naive:naive_gen.system").r()
+
+        user_prompt = T(".naive:naive_gen.user").r(
+            competition_desc=competition_desc,
+            sota_exp_desc=sota_exp_desc,
+            scenario_desc=scenario_desc,
+            sota_exp_and_feedback_list_desc=sota_exp_and_feedback_list_desc,
+            failed_exp_and_feedback_list_desc=failed_exp_and_feedback_list_desc,
+        )
+
+        task = build_cls_from_json_with_retry(
+            cls=PipelineTask,
+            system_prompt=sys_prompt,
+            user_prompt=user_prompt,
+            retry_n=5,
+        )
+
+        exp = DSExperiment(
+            pending_tasks_list=[[task]],
+            hypothesis=DSHypothesis(
+                component="Pipeline",
+                hypothesis=task.description,
+            ),
+        )
+
+        if sota_exp is not None:
+            exp.experiment_workspace.inject_code_from_file_dict(sota_exp.experiment_workspace)
+        return exp
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/naive.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/naive.yaml
@@ -0,0 +1,37 @@
+naive_gen:
+  system: |-
+    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+    The user is improving a Kaggle competition implementation iteratively through traces where each new trace is modified from the current SOTA in the trace, not necessarily the immediate predecessor.
+    You will be given a competition scenario, previous SOTA(best) and failed experiments and feedbacks, the current SOTA implementation and feedback, and a list of identified problems.
+
+    ## Guidelines
+    Here are guidelines to aid your task design. You don't need to answer all the questions.
+    1. Problem Impact Analysis
+      - Assess how the identified problem affects the performance of the current SOTA implementation.
+    2. Lessons from Previous Experiments
+      - For persistent problem, analyze why previous experiments failed on this problem.
+      - Review why previous experiments failed to address the problem. Identify patterns, overlooked factors, or misaligned assumptions.
+      - Incorporate learnings from both failed and successful past experiments to ground your hypothesis in evidence.
+    3. Actionable Changes
+      - If the problem relates to time/memory constraints, suggest smaller model sizes or alternative algorithms with reduced complexity.
+      - If the problem involves underperforming models, propose removing or replacing models with significantly worse performance.
+      - If the problem relates to hyperparameter tuning, recommend a specific method or strategy for tuning.
+
+    ## Final Output Format in JSON Schema:
+    {% include "scenarios.data_science.proposal.exp_gen.prompts:output_format.pipeline" %}
+
+  user: |-
+    # Scenario Description
+    {{ scenario_desc }}
+
+    # Competition Description
+    {{ competition_desc }}
+
+    # Previous Failed Experiments and Feedbacks:
+    {{ failed_exp_and_feedback_list_desc }}
+
+    # Previous SOTA Experiments and Feedbacks:
+    {{ sota_exp_and_feedback_list_desc }}
+
+    # Current SOTA Implementation
+    {{ sota_exp_desc }}
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py b/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
@@ -58,7 +58,7 @@
 
 
 class DSProposalV1ExpGen(ExpGen):
-    def gen(self, trace: DSTrace, max_trace_hist: int) -> DSExperiment:
+    def gen(self, trace: DSTrace) -> DSExperiment:
         # Guidelines:
         # System prompts: Shared condition you are facing
         # - scenario description: `scenario_desc`
@@ -84,7 +84,9 @@ def gen(self, trace: DSTrace, max_trace_hist: int) -> DSExperiment:
         )  # we use file_dict for hitting the cache when replicate the experiment in another machine.
 
         sota_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="sota")
-        failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[-max_trace_hist:]
+        failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[
+            -DS_RD_SETTING.max_trace_hist :
+        ]
         all_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="all")
         trace_component_to_feedback_df = pd.DataFrame(columns=["component", "hypothesis", "decision"])
         for index, (exp, fb) in enumerate(all_exp_feedback_list):
@@ -414,7 +416,7 @@ def task_gen(
             exp.pending_tasks_list.append([workflow_task])
         return exp
 
-    def gen(self, trace: DSTrace, max_trace_hist: int, pipeline: bool = False) -> DSExperiment:
+    def gen(self, trace: DSTrace, pipeline: bool = False) -> DSExperiment:
         component_desc = "\n".join(
             [
                 f"[{key}] {value}"
@@ -431,7 +433,9 @@ def gen(self, trace: DSTrace, max_trace_hist: int, pipeline: bool = False) -> DS
         )
 
         sota_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="sota")
-        failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[-max_trace_hist:]
+        failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[
+            -DS_RD_SETTING.max_trace_hist :
+        ]
 
         sota_exp_feedback_list_desc = T("scenarios.data_science.share:describe.trace").r(
             exp_and_feedback_list=sota_exp_feedback_list,
diff --git a/rdagent/scenarios/data_science/share.yaml b/rdagent/scenarios/data_science/share.yaml
@@ -286,4 +286,4 @@ component_spec:
 
     8. Submission File:
       - Save the final predictions as `submission.csv`, ensuring the format matches the competition requirements (refer to `sample_submission` in the Folder Description for the correct structure).
-      - Present the required submission format explicitly and ensure the output adheres to it.
+      - Present the required submission format explicitly and ensure the output adheres to it.

Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ def base_iter(settings_cls: type[ExtendedBaseSettings]) -> list[type[ExtendedBas`
`39`	`39`	`env_prefix=base_cls.model_config.get("env_prefix"),`
`40`	`40`	`env_nested_delimiter=base_cls.model_config.get("env_nested_delimiter"),`
`41`	`41`	`)`
`42`		`- for base_cls in base_iter(cast(type[ExtendedBaseSettings], settings_cls))`
	`42`	`+ for base_cls in base_iter(cast("type[ExtendedBaseSettings]", settings_cls))`
`43`	`43`	`]`
`44`	`44`	`return init_settings, env_settings, *parent_env_settings, dotenv_settings, file_secret_settings`
`45`	`45`