snowflakedb
diff --git a/‎docs/source/modin/groupby.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/modin/groupby.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/snowflake/snowpark/modin/plugin/__init__.py‎
Lines changed: 29 additions & 24 deletions b/‎src/snowflake/snowpark/modin/plugin/__init__.py‎
Lines changed: 29 additions & 24 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/telemetry.py‎
Lines changed: 53 additions & 41 deletions b/‎src/snowflake/snowpark/modin/plugin/_internal/telemetry.py‎
Lines changed: 53 additions & 41 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/utils.py‎
Lines changed: 7 additions & 7 deletions b/‎src/snowflake/snowpark/modin/plugin/_internal/utils.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py‎
Lines changed: 0 additions & 58 deletions b/‎src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py‎
Lines changed: 0 additions & 58 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py‎
Lines changed: 0 additions & 4 deletions b/‎src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py‎
Lines changed: 0 additions & 4 deletions
@@ -2,7 +2,7 @@
 GroupBy
 =============================
 
-.. currentmodule:: snowflake.snowpark.modin.plugin.extensions.groupby_overrides
+.. currentmodule:: modin.pandas.groupby
 .. rubric:: :doc:`All supported groupby APIs <supported/groupby_supported>`
 
 .. rubric:: Indexing, iteration
 
@@ -57,6 +57,8 @@
 import snowflake.snowpark.modin.plugin.extensions.dataframe_overrides  # isort: skip  # noqa: E402,F401
 import snowflake.snowpark.modin.plugin.extensions.series_extensions  # isort: skip  # noqa: E402,F401
 import snowflake.snowpark.modin.plugin.extensions.series_overrides  # isort: skip  # noqa: E402,F401
+import snowflake.snowpark.modin.plugin.extensions.dataframe_groupby_overrides  # isort: skip  # noqa: E402,F401
+import snowflake.snowpark.modin.plugin.extensions.series_groupby_overrides  # isort: skip  # noqa: E402,F401
 
 # === INITIALIZE DOCSTRINGS ===
 # These imports also all need to occur after modin + pandas dependencies are validated.
@@ -72,8 +74,9 @@
 import modin.pandas.series_utils  # type: ignore[import]  # isort: skip  # noqa: E402
 
 # Hybrid Mode Imports
-from modin.core.storage_formats.pandas.query_compiler_caster import (
+from modin.core.storage_formats.pandas.query_compiler_caster import (  # isort: skip  # noqa: E402
     _GENERAL_EXTENSIONS,
+    _NON_EXTENDABLE_ATTRIBUTES,
     register_function_for_post_op_switch,
     register_function_for_pre_op_switch,
 )
@@ -182,18 +185,25 @@
     "cumsum",
 ]
 
-post_op_switch_points = [
-    {"class_name": None, "method": "read_snowflake"},
-    {"class_name": "Series", "method": "value_counts"},
-    {"class_name": "DataFrame", "method": "value_counts"},
-    # Series.agg can return a Series if a list of aggregations is provided
-    {"class_name": "Series", "method": "agg"},
-    {"class_name": "Series", "method": "aggregate"},
-] + [{"class_name": "DataFrame", "method": agg_method} for agg_method in aggregations] + [
-    {"class_name": "DataFrameGroupBy", "method": agg_method} for agg_method in aggregations
-] + [
-    {"class_name": "SeriesGroupBy", "method": agg_method} for agg_method in aggregations
-]
+post_op_switch_points = (
+    [
+        {"class_name": None, "method": "read_snowflake"},
+        {"class_name": "Series", "method": "value_counts"},
+        {"class_name": "DataFrame", "method": "value_counts"},
+        # Series.agg can return a Series if a list of aggregations is provided
+        {"class_name": "Series", "method": "agg"},
+        {"class_name": "Series", "method": "aggregate"},
+    ]
+    + [{"class_name": "DataFrame", "method": agg_method} for agg_method in aggregations]
+    + [
+        {"class_name": "DataFrameGroupBy", "method": agg_method}
+        for agg_method in aggregations
+    ]
+    + [
+        {"class_name": "SeriesGroupBy", "method": agg_method}
+        for agg_method in aggregations
+    ]
+)
 
 pre_op_points = []
 for point in pre_op_switch_points:
@@ -214,15 +224,14 @@
     )
 
 
-
 # Remove print statements for the customer validation release
-#print("#################### HYBRID MODE #################")
-#print(f"######## Registered Pre-Operation Methods ########\n{', '.join(pre_op_points)}")
-#print("##################################################")
-#print(
+# print("#################### HYBRID MODE #################")
+# print(f"######## Registered Pre-Operation Methods ########\n{', '.join(pre_op_points)}")
+# print("##################################################")
+# print(
 #    f"######## Registered_Post-Operation_Methods #######\n{', '.join(post_op_points)}"
-#)
-#print("##################################################\n")
+# )
+# print("##################################################\n")
 
 Backend.set_active_backends(["Snowflake", "Pandas"])
 
@@ -255,10 +264,6 @@
     register_base_accessor,
 )
 from modin.pandas.accessor import ModinAPI  # isort: skip  # noqa: E402,F401
-from modin.core.storage_formats.pandas.query_compiler_caster import (  # isort: skip  # noqa: E402,F401
-    _NON_EXTENDABLE_ATTRIBUTES,
-    _GENERAL_EXTENSIONS,
-)
 
 from snowflake.snowpark.modin.plugin._internal.telemetry import (  # isort: skip  # noqa: E402,F401
     TELEMETRY_PRIVATE_METHODS,
 
@@ -568,7 +568,6 @@ class TelemetryMeta(type):
     def __new__(
         cls, name: str, bases: tuple, attrs: dict[str, Any]
     ) -> Union[
-        "snowflake.snowpark.modin.plugin.extensions.groupby_overrides.DataFrameGroupBy",
         "snowflake.snowpark.modin.plugin.extensions.resample_overrides.Resampler",
         "snowflake.snowpark.modin.plugin.extensions.window_overrides.Window",
         "snowflake.snowpark.modin.plugin.extensions.window_overrides.Rolling",
@@ -581,7 +580,6 @@ def __new__(
         with ``snowpark_pandas_telemetry_api_usage`` telemetry decorator.
         Method arguments returned by _get_kwargs_telemetry are collected otherwise set telemetry_args=list().
         TelemetryMeta is only set as the metaclass of:
-         snowflake.snowpark.modin.plugin.extensions.groupby_overrides.DataFrameGroupBy,
          snowflake.snowpark.modin.plugin.extensions.resample_overrides.Resampler,
          snowflake.snowpark.modin.plugin.extensions.window_overrides.Window,
          snowflake.snowpark.modin.plugin.extensions.window_overrides.Rolling, and their subclasses.
@@ -593,7 +591,6 @@ def __new__(
             attrs (Dict[str, Any]): The attributes of the class.
 
         Returns:
-            Union[snowflake.snowpark.modin.plugin.extensions.groupby_overrides.DataFrameGroupBy,
                 snowflake.snowpark.modin.plugin.extensions.resample_overrides.Resampler,
                 snowflake.snowpark.modin.plugin.extensions.window_overrides.Window,
                 snowflake.snowpark.modin.plugin.extensions.window_overrides.Rolling]:
@@ -620,13 +617,13 @@ def snowpark_pandas_api_watcher(api_name: str, _time: Union[int, float]) -> None
     if len(tokens) >= 2 and tokens[0] == "pandas-api":
         modin_api_call_history.append(tokens[1])
 
-hybrid_switch_log = native_pd.DataFrame(
-    {}
-)
+
+hybrid_switch_log = native_pd.DataFrame({})
+
 
 @cached(cache={})
 def get_user_source_location(mode, group) -> str:
-    
+
     import inspect
 
     stack = inspect.stack()
@@ -644,33 +641,42 @@ def get_user_source_location(mode, group) -> str:
         and frame_before_snowpandas.code_context is not None
     ):
         location = frame_before_snowpandas.code_context[0].replace("\n", "")
-    return {'mode': mode, 'group': group, 'location': location }
+    return {"mode": mode, "group": group, "location": location}
+
 
 def get_hybrid_switch_log():
     global hybrid_switch_log
     return hybrid_switch_log.copy()
 
+
 def add_to_hybrid_switch_log(metrics: dict):
     global hybrid_switch_log
     try:
-        mode = metrics['mode']
-        source = get_user_source_location(mode, metrics['group'])['location']
+        mode = metrics["mode"]
+        source = get_user_source_location(mode, metrics["group"])["location"]
         if len(source) > 40:
             source = source[0:17] + "..." + source[-20:-1] + source[-1]
-        hybrid_switch_log = native_pd.concat([hybrid_switch_log, 
-                                                native_pd.DataFrame({'source': [source],
-                                                                   'mode': [metrics['mode']],
-                                                                   'group': [metrics['group']],
-                                                                   'metric': [metrics['metric']],
-                                                                   'submetric': [metrics['submetric'] or None],
-                                                                   'value': [metrics['value']],
-                                                                   'from': [metrics['from'] if 'from' in metrics else None],
-                                                                   'to': [metrics['to'] if 'to' in metrics else None],
-                                                                   })])
+        hybrid_switch_log = native_pd.concat(
+            [
+                hybrid_switch_log,
+                native_pd.DataFrame(
+                    {
+                        "source": [source],
+                        "mode": [metrics["mode"]],
+                        "group": [metrics["group"]],
+                        "metric": [metrics["metric"]],
+                        "submetric": [metrics["submetric"] or None],
+                        "value": [metrics["value"]],
+                        "from": [metrics["from"] if "from" in metrics else None],
+                        "to": [metrics["to"] if "to" in metrics else None],
+                    }
+                ),
+            ]
+        )
     except Exception as e:
         print(f"Exception: {type(e).__name__} - {e}")
-        
-        
+
+
 def hybrid_metrics_watcher(metric_name: str, value: Union[int, float]) -> None:
     if metric_name.startswith("modin.hybrid.auto"):
         tokens = metric_name.split(".")
@@ -688,35 +694,41 @@ def hybrid_metrics_watcher(metric_name: str, value: Union[int, float]) -> None:
         if len(tokens) == 10:
             submetric = tokens[8]
             group = tokens[9]
-        add_to_hybrid_switch_log({'mode': 'single', 
-                                  'from': from_engine, 
-                                  'to': to_engine, 
-                                  'metric': metric, 
-                                  'submetric': submetric, 
-                                  'group': group, 
-                                  'value': value})
+        add_to_hybrid_switch_log(
+            {
+                "mode": "single",
+                "from": from_engine,
+                "to": to_engine,
+                "metric": metric,
+                "submetric": submetric,
+                "group": group,
+                "value": value,
+            }
+        )
     if metric_name.startswith("modin.hybrid.cast"):
         tokens = metric_name.split(".")
         to_engine = None
         metric = None
         submetric = None
         group = None
-        if len(tokens) == 7 and tokens[3] == 'to' and tokens[5] == 'cost':
+        if len(tokens) == 7 and tokens[3] == "to" and tokens[5] == "cost":
             to_engine = tokens[4]
             group = tokens[6]
-            metric = 'cost'
-        if len(tokens) == 6 and tokens[3] == 'decision':
+            metric = "cost"
+        if len(tokens) == 6 and tokens[3] == "decision":
             submetric = tokens[4]
             group = tokens[5]
-            metric = 'decision'
-        add_to_hybrid_switch_log({'mode': 'merge', 
-                                  'to': to_engine, 
-                                  'metric': metric,
-                                  'submetric': submetric,
-                                  'group': group, 
-                                  'value': value})
-        
-    
+            metric = "decision"
+        add_to_hybrid_switch_log(
+            {
+                "mode": "merge",
+                "to": to_engine,
+                "metric": metric,
+                "submetric": submetric,
+                "group": group,
+                "value": value,
+            }
+        )
 
 
 def connect_modin_telemetry() -> None:
 
@@ -1187,13 +1187,13 @@ def create_ordered_dataframe_from_pandas(
             ]
         ),
     )
-    ordered_df = cache_result(
-        OrderedDataFrame(
-            DataFrameReference(snowpark_df, snowflake_quoted_identifiers),
-            projected_column_snowflake_quoted_identifiers=snowflake_quoted_identifiers,
-            ordering_columns=ordering_columns,
-            row_position_snowflake_quoted_identifier=row_position_snowflake_quoted_identifier,
-        )
+    # TODO hybrid wraps this in cache_result, but this messes with query counts everywhere
+    # temporarily remove this for the sake of testing
+    ordered_df = OrderedDataFrame(
+        DataFrameReference(snowpark_df, snowflake_quoted_identifiers),
+        projected_column_snowflake_quoted_identifiers=snowflake_quoted_identifiers,
+        ordering_columns=ordering_columns,
+        row_position_snowflake_quoted_identifier=row_position_snowflake_quoted_identifier,
     )
     # Set the materialized row count
     ordered_df.row_count = df.shape[0]
 
@@ -43,61 +43,3 @@ def __array_function__(self, func: callable, types: tuple, args: tuple, kwargs:
     else:
         # per NEP18 we raise NotImplementedError so that numpy can intercept
         return NotImplemented  # pragma: no cover
-
-'''
-@register_base_override(name="__switcheroo__")
-def __switcheroo__(self, inplace=False, operation=""):
-    if not is_autoswitch_enabled():
-        return self
-    from modin.core.storage_formats.pandas.native_query_compiler import (
-        NativeQueryCompiler,
-    )
-
-    cost_to_move = self._get_query_compiler().move_to_cost(
-        NativeQueryCompiler, "", operation
-    )
-
-    # figure out if this needs to be a standard API
-    cost_to_stay = self._get_query_compiler().stay_cost(
-        NativeQueryCompiler, "", operation
-    )
-
-    # prototype explain
-    import modin.pandas as pd
-
-    row_estimate = SnowflakeQueryCompiler._get_rows(self._get_query_compiler())
-    import inspect
-
-    stack = inspect.stack()
-    frame_before_snowpandas = None
-    location = "<unknown>"
-    for _i, f in enumerate(reversed(stack)):
-        if f.filename is None:
-            continue
-        if "snowpark" in f.filename or "modin" in f.filename:
-            break
-        else:
-            frame_before_snowpandas = f
-    if (
-        frame_before_snowpandas is not None
-        and frame_before_snowpandas.code_context is not None
-    ):
-        location = frame_before_snowpandas.code_context[0].replace("\n", "")
-    pd.add_switcheroo_log(
-        location,
-        operation,
-        "Snowflake",
-        row_estimate,
-        cost_to_stay,
-        cost_to_move,
-        "Pandas" if cost_to_move < cost_to_stay else "Snowflake",
-    )
-
-    if cost_to_move < cost_to_stay:
-        the_new_me_maybe = self.move_to("Pandas", inplace=inplace)
-        if inplace:
-            return self
-        else:
-            return the_new_me_maybe
-    return self
-'''
@@ -2096,10 +2096,6 @@ def describe(
     """
     Generate descriptive statistics.
     """
-    # TODO Remove Switcheroo
-    #self = self.__switcheroo__(inplace=True, operation="describe")
-    #if self.get_backend() != "Snowflake":
-    #    return self.describe(percentiles, include, exclude)
     # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
     percentiles = _refine_percentiles(percentiles)
     data = self