Skip to content

Commit 6f0ff79

Browse files
FIX-#4023: Fall back to pandas in case of MultiIndex columns (#5149)
Signed-off-by: Andrey Pavlenko <[email protected]>
1 parent f492ba9 commit 6f0ff79

File tree

3 files changed

+34
-9
lines changed

3 files changed

+34
-9
lines changed

modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe/dataframe.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2385,14 +2385,22 @@ def from_pandas(cls, df):
23852385
"""
23862386
new_index = df.index
23872387
new_columns = df.columns
2388+
2389+
if isinstance(new_columns, MultiIndex):
2390+
# MultiIndex columns are not supported by the HDK backend.
2391+
# We just print this warning here and fall back to pandas.
2392+
index_cols = None
2393+
ErrorMessage.single_warning(
2394+
"MultiIndex columns are not currently supported by the HDK backend."
2395+
)
23882396
# If there is non-trivial index, we put it into columns.
23892397
# If the index is trivial, but there are no columns, we put
23902398
# it into columns either because, otherwise, we don't know
23912399
# the number of rows and, thus, unable to restore the index.
23922400
# That's what we usually have for arrow tables and execution
23932401
# result. Unnamed index is renamed to __index__. Also all
23942402
# columns get 'F_' prefix to handle names unsupported in HDK.
2395-
if len(new_index) == 0 or (
2403+
elif len(new_index) == 0 or (
23962404
len(new_columns) != 0 and cls._is_trivial_index(new_index)
23972405
):
23982406
index_cols = None

modin/experimental/core/execution/native/implementations/hdk_on_native/partitioning/partition_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ def _get_unsupported_cols(cls, obj):
163163

164164
if obj.empty:
165165
unsupported_cols = []
166+
elif isinstance(obj.columns, pandas.MultiIndex):
167+
unsupported_cols = [str(c) for c in obj.columns]
166168
else:
167169
cols = [name for name, col in obj.dtypes.items() if col == "object"]
168170
type_samples = obj.iloc[0][cols]

modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,7 @@ def applier(lib):
447447
eval_general(pd, pandas, applier)
448448

449449
@pytest.mark.parametrize("is_multiindex", [True, False])
450-
@pytest.mark.parametrize(
451-
"column_names", [None, ["level1", None], ["level1", "level2"]]
452-
)
453-
def test_reset_index_multicolumns(self, is_multiindex, column_names):
450+
def test_reset_index_multicolumns(self, is_multiindex):
454451
index = (
455452
pandas.MultiIndex.from_tuples(
456453
[(i, j, k) for i in range(2) for j in range(3) for k in range(4)],
@@ -459,9 +456,6 @@ def test_reset_index_multicolumns(self, is_multiindex, column_names):
459456
if is_multiindex
460457
else pandas.Index(np.arange(1, len(self.data["a"]) + 1), name="index")
461458
)
462-
columns = pandas.MultiIndex.from_tuples(
463-
[("a", "b"), ("b", "c")], names=column_names
464-
)
465459
data = np.array(list(self.data.values())).T
466460

467461
def applier(df, **kwargs):
@@ -471,7 +465,7 @@ def applier(df, **kwargs):
471465
run_and_compare(
472466
fn=applier,
473467
data=data,
474-
constructor_kwargs={"index": index, "columns": columns},
468+
constructor_kwargs={"index": index},
475469
)
476470

477471
def test_set_index_name(self):
@@ -498,6 +492,27 @@ def test_set_index_names(self):
498492

499493
df_equals(pandas_df, modin_df)
500494

495+
def test_rename(self):
496+
index = pandas.MultiIndex.from_tuples(
497+
[("foo1", "bar1"), ("foo2", "bar2")], names=["foo", "bar"]
498+
)
499+
columns = pandas.MultiIndex.from_tuples(
500+
[("fizz1", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"]
501+
)
502+
503+
def rename(df, **kwargs):
504+
return df.rename(
505+
index={"foo1": "foo3", "bar2": "bar3"},
506+
columns={"fizz1": "fizz3", "buzz2": "buzz3"},
507+
)
508+
509+
run_and_compare(
510+
fn=rename,
511+
data=[(0, 0), (1, 1)],
512+
constructor_kwargs={"index": index, "columns": columns},
513+
force_lazy=False,
514+
)
515+
501516

502517
class TestFillna:
503518
data = {"a": [1, 1, None], "b": [None, None, 2], "c": [3, None, None]}

0 commit comments

Comments
 (0)