Skip to content

Commit dcaa489

Browse files
committed
Test fixes for in-place modification
1 parent d1533d7 commit dcaa489

File tree

2 files changed

+28
-12
lines changed

2 files changed

+28
-12
lines changed

modin/core/storage_formats/pandas/query_compiler_caster.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,8 +1157,6 @@ def cast_to_qc(arg):
11571157
original_qc,
11581158
new_castable,
11591159
) in inplace_update_trackers:
1160-
new_qc = new_castable._get_query_compiler()
1161-
#if original_qc is not new_qc:
11621160
new_castable._copy_into(original_castable)
11631161

11641162
return _maybe_switch_backend_post_op(

modin/tests/pandas/native_df_interoperability/test_compiler_caster.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,8 @@ def test_two_same_backend(pico_df):
413413

414414
def test_cast_to_second_backend_with_concat(pico_df, cluster_df, caplog):
415415
with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
416-
df3 = pd.concat([pico_df, cluster_df], axis=1)
416+
# We have to copy the input dataframes because of inplace merging
417+
df3 = pd.concat([pico_df.copy(), cluster_df.copy()], axis=1)
417418
assert pico_df.get_backend() == "Pico"
418419
assert cluster_df.get_backend() == "Cluster"
419420
assert df3.get_backend() == "Cluster" # result should be on cluster
@@ -431,7 +432,10 @@ def test_cast_to_second_backend_with_concat_uses_second_backend_api_override(
431432
register_pd_accessor(name="concat", backend="Cluster")(
432433
lambda *args, **kwargs: "custom_concat_result"
433434
)
434-
assert pd.concat([pico_df, cluster_df], axis=1) == "custom_concat_result"
435+
# copy dataframes for concat to allow for in-place merging
436+
assert (
437+
pd.concat([pico_df.copy(), cluster_df.copy()], axis=1) == "custom_concat_result"
438+
)
435439
assert pico_df.get_backend() == "Pico"
436440
assert cluster_df.get_backend() == "Cluster"
437441

@@ -449,14 +453,14 @@ def test_moving_pico_to_cluster_in_place_calls_set_backend_only_once_github_issu
449453

450454
def test_cast_to_second_backend_with___init__(pico_df, cluster_df):
451455
df3 = pd.DataFrame({"pico": pico_df.iloc[:, 0], "cluster": cluster_df.iloc[:, 0]})
452-
assert pico_df.get_backend() == "Pico"
456+
assert pico_df.get_backend() == "Cluster" # pico_df was cast inplace
453457
assert cluster_df.get_backend() == "Cluster"
454458
assert df3.get_backend() == "Cluster" # result should be on cluster
455459

456460

457461
def test_cast_to_first_backend(pico_df, cluster_df):
458462
df3 = pd.concat([cluster_df, pico_df], axis=1)
459-
assert pico_df.get_backend() == "Pico"
463+
assert pico_df.get_backend() == "Cluster" # pico_df was cast in place
460464
assert cluster_df.get_backend() == "Cluster"
461465
assert df3.get_backend() == cluster_df.get_backend() # result should be on cluster
462466

@@ -468,7 +472,7 @@ def test_cast_to_first_backend_with_concat_uses_first_backend_api_override(
468472
lambda *args, **kwargs: "custom_concat_result"
469473
)
470474
assert pd.concat([cluster_df, pico_df], axis=1) == "custom_concat_result"
471-
assert pico_df.get_backend() == "Pico"
475+
assert pico_df.get_backend() == "Cluster" # pico was cast inplace to cluster
472476
assert cluster_df.get_backend() == "Cluster"
473477

474478

@@ -479,7 +483,7 @@ def test_cast_to_first_backend_with___init__(pico_df, cluster_df):
479483
"pico": pico_df.iloc[:, 0],
480484
}
481485
)
482-
assert pico_df.get_backend() == "Pico"
486+
assert pico_df.get_backend() == "Cluster" # cluster was cast in place
483487
assert cluster_df.get_backend() == "Cluster"
484488
assert df3.get_backend() == "Cluster" # result should be on cluster
485489

@@ -557,31 +561,33 @@ def test_two_two_qc_types_default_rhs(default_df, cluster_df):
557561
# so we default to the caller
558562
df3 = pd.concat([default_df, cluster_df], axis=1)
559563
assert default_df.get_backend() == "Test_casting_default"
560-
assert cluster_df.get_backend() == "Cluster"
564+
assert (
565+
cluster_df.get_backend() == "Test_casting_default"
566+
) # in place cast to default
561567
assert df3.get_backend() == default_df.get_backend() # should move to default
562568

563569

564570
def test_two_two_qc_types_default_lhs(default_df, cluster_df):
565571
# none of the query compilers know about each other here
566572
# so we default to the caller
567573
df3 = pd.concat([cluster_df, default_df], axis=1)
568-
assert default_df.get_backend() == "Test_casting_default"
574+
assert default_df.get_backend() == "Cluster" # in place cast to Cluster
569575
assert cluster_df.get_backend() == "Cluster"
570576
assert df3.get_backend() == cluster_df.get_backend() # should move to cluster
571577

572578

573579
def test_two_two_qc_types_default_2_rhs(default_df, cloud_df):
574580
# cloud knows a bit about costing; so we prefer moving to there
575581
df3 = pd.concat([default_df, cloud_df], axis=1)
576-
assert default_df.get_backend() == "Test_casting_default"
582+
assert default_df.get_backend() == "Cloud" # inplace cast to Cloud
577583
assert cloud_df.get_backend() == "Cloud"
578584
assert df3.get_backend() == cloud_df.get_backend() # should move to cloud
579585

580586

581587
def test_two_two_qc_types_default_2_lhs(default_df, cloud_df):
582588
# cloud knows a bit about costing; so we prefer moving to there
583589
df3 = pd.concat([cloud_df, default_df], axis=1)
584-
assert default_df.get_backend() == "Test_casting_default"
590+
assert default_df.get_backend() == "Cloud" # inplace cast to Cloud
585591
assert cloud_df.get_backend() == "Cloud"
586592
assert df3.get_backend() == cloud_df.get_backend() # should move to cloud
587593

@@ -651,6 +657,18 @@ def test_qc_mixed_loc(pico_df, cloud_df):
651657
assert cloud_df1[pico_df1[0][0]][pico_df1[0][1]] == 1
652658

653659

660+
def test_merge_in_place(default_df, lazy_df, cloud_df):
661+
# lazy_df tries to pawn off work on other engines
662+
df = default_df.merge(lazy_df)
663+
assert type(df) is type(default_df)
664+
# Both arguments now have the same qc type
665+
assert type(lazy_df) is type(default_df)
666+
667+
df = cloud_df.merge(lazy_df)
668+
assert type(df) is type(cloud_df)
669+
assert type(lazy_df) is type(cloud_df)
670+
671+
654672
def test_information_asymmetry(default_df, cloud_df, eager_df, lazy_df):
655673
# normally, the default query compiler should be chosen
656674
# here, but since eager knows about default, but not

0 commit comments

Comments
 (0)