|
1 | 1 | import json
|
2 |
| -from typing import TYPE_CHECKING |
| 2 | +from typing import TYPE_CHECKING, Dict |
3 | 3 |
|
4 | 4 | from rdagent.app.data_science.conf import DS_RD_SETTING
|
5 | 5 | from rdagent.components.coder.data_science.ensemble.exp import EnsembleTask
|
6 | 6 | from rdagent.components.coder.data_science.feature.exp import FeatureTask
|
7 | 7 | from rdagent.components.coder.data_science.model.exp import ModelTask
|
| 8 | +from rdagent.components.coder.data_science.pipeline.exp import PipelineTask |
8 | 9 | from rdagent.components.coder.data_science.raw_data_loader.exp import DataLoaderTask
|
9 | 10 | from rdagent.components.coder.data_science.workflow.exp import WorkflowTask
|
10 | 11 | from rdagent.core.proposal import ExpGen, Hypothesis
|
@@ -116,3 +117,77 @@ def gen(
|
116 | 117 | # exp.experiment_workspace.inject_code_from_folder(last_successful_exp.experiment_workspace.workspace_path)
|
117 | 118 | exp.experiment_workspace.inject_code_from_file_dict(last_successful_exp.experiment_workspace)
|
118 | 119 | return exp
|
| 120 | + |
| 121 | + |
| 122 | +class DSDraftV2ExpGen(ExpGen): |
| 123 | + def task_gen( |
| 124 | + self, |
| 125 | + scenario_desc: str, |
| 126 | + scen_problems: dict, |
| 127 | + component_desc: str, |
| 128 | + drafting_trace_desc: str, |
| 129 | + ) -> DSExperiment: |
| 130 | + scen_problems_text = "" |
| 131 | + for i, (problem_name, problem_dict) in enumerate(scen_problems.items()): |
| 132 | + scen_problems_text += f"## Problem Name: {problem_name}\n" |
| 133 | + scen_problems_text += f"- Problem Description: {problem_dict['problem']}\n\n" |
| 134 | + sys_prompt = T(".prompts_drafting:task_draft.system").r( |
| 135 | + task_spec=T(f"scenarios.data_science.share:component_spec.Pipeline").r(), |
| 136 | + component_desc=component_desc, |
| 137 | + ) |
| 138 | + user_prompt = T(".prompts_drafting:task_draft.user").r( |
| 139 | + scenario_desc=scenario_desc, |
| 140 | + scen_problems=scen_problems_text, |
| 141 | + drafting_trace_desc=drafting_trace_desc, |
| 142 | + ) |
| 143 | + response = APIBackend().build_messages_and_create_chat_completion( |
| 144 | + user_prompt=user_prompt, |
| 145 | + system_prompt=sys_prompt, |
| 146 | + json_mode=True, |
| 147 | + json_target_type=Dict[str, str], |
| 148 | + ) |
| 149 | + task_dict = json.loads(response) |
| 150 | + task_design = task_dict.get("task_design", "Description not provided") |
| 151 | + task = PipelineTask(name="Workflow", description=task_design) |
| 152 | + |
| 153 | + # we use a pesudo hypothesis here |
| 154 | + pesudo_hypothesis = DSHypothesis( |
| 155 | + component=task_component, |
| 156 | + hypothesis="This is a pesudo hypothesis for drafting the first competition implementation. Your result should not be influenced by this hypothesis.", |
| 157 | + problem_name="This is a pesudo problem name for drafting. The corresponding problem description includes several problem together.", |
| 158 | + problem_desc=scen_problems_text, |
| 159 | + ) |
| 160 | + exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=pesudo_hypothesis) |
| 161 | + return exp |
| 162 | + |
| 163 | + def gen(self, trace: DSTrace) -> DSExperiment: |
| 164 | + # Prepare |
| 165 | + last_exp = trace.last_exp() |
| 166 | + if not isinstance(last_exp, DSExperiment): |
| 167 | + eda_output = None |
| 168 | + else: |
| 169 | + eda_output = last_exp.experiment_workspace.file_dict.get("EDA.md", None) |
| 170 | + |
| 171 | + component_desc = T("scenarios.data_science.share:component_description_in_pipeline").r() |
| 172 | + scenario_desc = trace.scen.get_scenario_all_desc(eda_output=eda_output) |
| 173 | + drafting_trace_desc = T("scenarios.data_science.share:describe.drafting_trace").r( |
| 174 | + exp_and_feedback_list=trace.experiment_and_feedback_list_after_init(return_type="all"), |
| 175 | + ) |
| 176 | + |
| 177 | + # Step 1: Identify Scenario Problems |
| 178 | + sys_prompt = T(".prompts_drafting:scenario_problem.system").r() |
| 179 | + user_prompt = T(".prompts_drafting:scenario_problem.user").r(scenario_desc=scenario_desc) |
| 180 | + response = APIBackend().build_messages_and_create_chat_completion( |
| 181 | + user_prompt=user_prompt, |
| 182 | + system_prompt=sys_prompt, |
| 183 | + json_mode=True, |
| 184 | + json_target_type=Dict[str, Dict[str, str]], |
| 185 | + ) |
| 186 | + scen_problems = json.loads(response) |
| 187 | + |
| 188 | + # Step 2: Design Task |
| 189 | + return self.task_gen( |
| 190 | + scenario_desc=scenario_desc, |
| 191 | + scen_problems=scen_problems, |
| 192 | + drafting_trace_desc=drafting_trace_desc, |
| 193 | + ) |
0 commit comments