feat: add competition level filter and extract constants to utils (#869)

you-n-g · web-flow · commit b40b6055368e · 2025-05-12T21:56:57.000+08:00
* feat: add competition level filter and extract constants to utils

* lint
diff --git a/rdagent/log/ui/ds_summary.py b/rdagent/log/ui/ds_summary.py
@@ -14,6 +14,7 @@
 from rdagent.log.mle_summary import extract_mle_json
 from rdagent.log.ui.conf import UI_SETTING
 from rdagent.log.ui.ds_trace import load_times
+from rdagent.log.ui.utils import ALL, HIGH, LITE, MEDIUM
 from rdagent.scenarios.kaggle.kaggle_crawler import leaderboard_scores
 
 
@@ -352,32 +353,6 @@ def mean_func(x: pd.DataFrame):
         st.dataframe(df)
 
 
-LITE = [
-    "aerial-cactus-identification",
-    "aptos2019-blindness-detection",
-    "denoising-dirty-documents",
-    "detecting-insults-in-social-commentary",
-    "dog-breed-identification",
-    "dogs-vs-cats-redux-kernels-edition",
-    "histopathologic-cancer-detection",
-    "jigsaw-toxic-comment-classification-challenge",
-    "leaf-classification",
-    "mlsp-2013-birds",
-    "new-york-city-taxi-fare-prediction",
-    "nomad2018-predict-transparent-conductors",
-    "plant-pathology-2020-fgvc7",
-    "random-acts-of-pizza",
-    "ranzcr-clip-catheter-line-classification",
-    "siim-isic-melanoma-classification",
-    "spooky-author-identification",
-    "tabular-playground-series-dec-2021",
-    "tabular-playground-series-may-2022",
-    "text-normalization-challenge-english-language",
-    "text-normalization-challenge-russian-language",
-    "the-icml-2013-whale-challenge-right-whale-redux",
-]
-
-
 def all_summarize_win():
     def shorten_folder_name(folder: str) -> str:
         if "amlt" in folder:
@@ -401,8 +376,24 @@ def shorten_folder_name(folder: str) -> str:
     base_df = percent_df(base_df)
     base_df.insert(0, "Select", True)
     bt1, bt2 = st.columns(2)
-    if bt2.toggle("Select Lite Competitions", key="select_lite"):
-        base_df["Select"] = base_df["Competition"].isin(LITE)
+    select_lite_level = bt2.selectbox(
+        "Select MLE-Bench Competitions Level",
+        options=["ALL", "HIGH", "MEDIUM", "LITE"],
+        index=0,
+        key="select_lite_level",
+    )
+    if select_lite_level != "ALL":
+        if select_lite_level == "HIGH":
+            lite_set = set(HIGH)
+        elif select_lite_level == "MEDIUM":
+            lite_set = set(MEDIUM)
+        elif select_lite_level == "LITE":
+            lite_set = set(LITE)
+        else:
+            lite_set = set()
+        base_df["Select"] = base_df["Competition"].isin(lite_set)
+    else:
+        base_df["Select"] = True  # select all if ALL is chosen
 
     if bt1.toggle("Select Best", key="select_best"):
 
diff --git a/rdagent/log/ui/utils.py b/rdagent/log/ui/utils.py
@@ -0,0 +1,85 @@
+LITE = [
+    "aerial-cactus-identification",
+    "aptos2019-blindness-detection",
+    "denoising-dirty-documents",
+    "detecting-insults-in-social-commentary",
+    "dog-breed-identification",
+    "dogs-vs-cats-redux-kernels-edition",
+    "histopathologic-cancer-detection",
+    "jigsaw-toxic-comment-classification-challenge",
+    "leaf-classification",
+    "mlsp-2013-birds",
+    "new-york-city-taxi-fare-prediction",
+    "nomad2018-predict-transparent-conductors",
+    "plant-pathology-2020-fgvc7",
+    "random-acts-of-pizza",
+    "ranzcr-clip-catheter-line-classification",
+    "siim-isic-melanoma-classification",
+    "spooky-author-identification",
+    "tabular-playground-series-dec-2021",
+    "tabular-playground-series-may-2022",
+    "text-normalization-challenge-english-language",
+    "text-normalization-challenge-russian-language",
+    "the-icml-2013-whale-challenge-right-whale-redux",
+]
+
+HIGH = [
+    "3d-object-detection-for-autonomous-vehicles",
+    "bms-molecular-translation",
+    "google-research-identify-contrails-reduce-global-warming",
+    "hms-harmful-brain-activity-classification",
+    "iwildcam-2019-fgvc6",
+    "nfl-player-contact-detection",
+    "predict-volcanic-eruptions-ingv-oe",
+    "rsna-2022-cervical-spine-fracture-detection",
+    "rsna-breast-cancer-detection",
+    "rsna-miccai-brain-tumor-radiogenomic-classification",
+    "siim-covid19-detection",
+    "smartphone-decimeter-2022",
+    "stanford-covid-vaccine",
+    "vesuvius-challenge-ink-detection",
+    "vinbigdata-chest-xray-abnormalities-detection",
+]
+
+MEDIUM = [
+    "AI4Code",
+    "alaska2-image-steganalysis",
+    "billion-word-imputation",
+    "cassava-leaf-disease-classification",
+    "cdiscount-image-classification-challenge",
+    "chaii-hindi-and-tamil-question-answering",
+    "champs-scalar-coupling",
+    "facebook-recruiting-iii-keyword-extraction",
+    "freesound-audio-tagging-2019",
+    "google-quest-challenge",
+    "h-and-m-personalized-fashion-recommendations",
+    "herbarium-2020-fgvc7",
+    "herbarium-2021-fgvc8",
+    "herbarium-2022-fgvc9",
+    "hotel-id-2021-fgvc8",
+    "hubmap-kidney-segmentation",
+    "icecube-neutrinos-in-deep-ice",
+    "imet-2020-fgvc7",
+    "inaturalist-2019-fgvc6",
+    "iwildcam-2020-fgvc7",
+    "jigsaw-unintended-bias-in-toxicity-classification",
+    "kuzushiji-recognition",
+    "learning-agency-lab-automated-essay-scoring-2",
+    "lmsys-chatbot-arena",
+    "multi-modal-gesture-recognition",
+    "osic-pulmonary-fibrosis-progression",
+    "petfinder-pawpularity-score",
+    "plant-pathology-2021-fgvc8",
+    "seti-breakthrough-listen",
+    "statoil-iceberg-classifier-challenge",
+    "tensorflow-speech-recognition-challenge",
+    "tensorflow2-question-answering",
+    "tgs-salt-identification-challenge",
+    "tweet-sentiment-extraction",
+    "us-patent-phrase-to-phrase-matching",
+    "uw-madison-gi-tract-image-segmentation",
+    "ventilator-pressure-prediction",
+    "whale-categorization-playground",
+]
+
+ALL = HIGH + MEDIUM + LITE