Skip to content

Commit be3ee9d

Browse files
authored
fix: duplicate model names test in pipeline coder & runner (#763)
* add model name duplicate test in pipeline costeer * fix ci
1 parent 68b5018 commit be3ee9d

File tree

3 files changed

+14
-6
lines changed

3 files changed

+14
-6
lines changed

rdagent/components/coder/data_science/pipeline/eval.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,14 @@ def evaluate(
7171
model_set_in_scores = set(score_df.index)
7272

7373
# Check model names (index)
74+
if not score_df.index.is_unique:
75+
score_check_text += "\n[Error] The score dataframe contains duplicate model names."
76+
score_ret_code = 1
7477
if "ensemble" not in model_set_in_scores:
75-
score_check_text += (
76-
f"\n[Error] The score dataframe doesn't contain the ensemble model.\nscore_df is:\n{score_df}"
77-
)
78+
score_check_text += "\n[Error] The score dataframe doesn't contain the ensemble model."
7879
score_ret_code = 1
80+
if score_ret_code != 0:
81+
score_check_text += f"The score_df is:\n{score_df}"
7982

8083
# Check metric name (columns)
8184
if score_df.columns.tolist() != [self.scen.metric_name]:

rdagent/log/ui/ds_trace.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,8 @@ def summarize_data():
435435
df.loc[loop, "End Time (UTC+8)"] = state.times[loop][-1].end + timedelta(hours=8)
436436
if "running" in loop_data and "no_tag" in loop_data["running"]:
437437
try:
438-
df.loc[loop, "Running Score (valid)"] = round(
439-
loop_data["running"]["no_tag"].result.loc["ensemble"].iloc[0], 5
438+
df.loc[loop, "Running Score (valid)"] = str(
439+
round(loop_data["running"]["no_tag"].result.loc["ensemble"].iloc[0], 5)
440440
)
441441
except:
442442
df.loc[loop, "Running Score (valid)"] = "❌"

rdagent/scenarios/data_science/dev/runner/eval.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,14 @@ def evaluate(
6767
# Check model names (index)
6868
# in Pipeline task, we only check ensemble in scores.csv
6969
if DS_RD_SETTING.coder_on_whole_pipeline:
70+
if not score_df.index.is_unique:
71+
score_check_text += "\n[Error] The score dataframe contains duplicate model names."
72+
score_ret_code = 1
7073
if "ensemble" not in model_set_in_scores:
71-
score_check_text += f"\n[Error] The score dataframe doesn't contain the ensemble model.\nscore_df is:\n{score_df}"
74+
score_check_text += "\n[Error] The score dataframe doesn't contain the ensemble model."
7275
score_ret_code = 1
76+
if score_ret_code != 0:
77+
score_check_text += f"The score_df is:\n{score_df}"
7378
else:
7479
if model_set_in_scores != model_set_in_folder.union({"ensemble"}):
7580
score_check_text += f"\n[Error] The scores dataframe does not contain the correct model names as index.\ncorrect model names are: {model_set_in_folder.union({'ensemble'})}\nscore_df is:\n{score_df}"

0 commit comments

Comments
 (0)