@@ -413,7 +413,8 @@ def test_two_same_backend(pico_df):
413
413
414
414
def test_cast_to_second_backend_with_concat (pico_df , cluster_df , caplog ):
415
415
with caplog .at_level (level = logging .INFO , logger = DEFAULT_LOGGER_NAME ):
416
- df3 = pd .concat ([pico_df , cluster_df ], axis = 1 )
416
+ # We have to copy the input dataframes because of inplace merging
417
+ df3 = pd .concat ([pico_df .copy (), cluster_df .copy ()], axis = 1 )
417
418
assert pico_df .get_backend () == "Pico"
418
419
assert cluster_df .get_backend () == "Cluster"
419
420
assert df3 .get_backend () == "Cluster" # result should be on cluster
@@ -431,7 +432,10 @@ def test_cast_to_second_backend_with_concat_uses_second_backend_api_override(
431
432
register_pd_accessor (name = "concat" , backend = "Cluster" )(
432
433
lambda * args , ** kwargs : "custom_concat_result"
433
434
)
434
- assert pd .concat ([pico_df , cluster_df ], axis = 1 ) == "custom_concat_result"
435
+ # copy dataframes for concat to allow for in-place merging
436
+ assert (
437
+ pd .concat ([pico_df .copy (), cluster_df .copy ()], axis = 1 ) == "custom_concat_result"
438
+ )
435
439
assert pico_df .get_backend () == "Pico"
436
440
assert cluster_df .get_backend () == "Cluster"
437
441
@@ -449,14 +453,14 @@ def test_moving_pico_to_cluster_in_place_calls_set_backend_only_once_github_issu
449
453
450
454
def test_cast_to_second_backend_with___init__ (pico_df , cluster_df ):
451
455
df3 = pd .DataFrame ({"pico" : pico_df .iloc [:, 0 ], "cluster" : cluster_df .iloc [:, 0 ]})
452
- assert pico_df .get_backend () == "Pico"
456
+ assert pico_df .get_backend () == "Cluster" # pico_df was cast inplace
453
457
assert cluster_df .get_backend () == "Cluster"
454
458
assert df3 .get_backend () == "Cluster" # result should be on cluster
455
459
456
460
457
461
def test_cast_to_first_backend (pico_df , cluster_df ):
458
462
df3 = pd .concat ([cluster_df , pico_df ], axis = 1 )
459
- assert pico_df .get_backend () == "Pico"
463
+ assert pico_df .get_backend () == "Cluster" # pico_df was cast in place
460
464
assert cluster_df .get_backend () == "Cluster"
461
465
assert df3 .get_backend () == cluster_df .get_backend () # result should be on cluster
462
466
@@ -468,7 +472,7 @@ def test_cast_to_first_backend_with_concat_uses_first_backend_api_override(
468
472
lambda * args , ** kwargs : "custom_concat_result"
469
473
)
470
474
assert pd .concat ([cluster_df , pico_df ], axis = 1 ) == "custom_concat_result"
471
- assert pico_df .get_backend () == "Pico"
475
+ assert pico_df .get_backend () == "Cluster" # pico was cast inplace to cluster
472
476
assert cluster_df .get_backend () == "Cluster"
473
477
474
478
@@ -479,7 +483,7 @@ def test_cast_to_first_backend_with___init__(pico_df, cluster_df):
479
483
"pico" : pico_df .iloc [:, 0 ],
480
484
}
481
485
)
482
- assert pico_df .get_backend () == "Pico"
486
+ assert pico_df .get_backend () == "Cluster" # cluster was cast in place
483
487
assert cluster_df .get_backend () == "Cluster"
484
488
assert df3 .get_backend () == "Cluster" # result should be on cluster
485
489
@@ -557,31 +561,33 @@ def test_two_two_qc_types_default_rhs(default_df, cluster_df):
557
561
# so we default to the caller
558
562
df3 = pd .concat ([default_df , cluster_df ], axis = 1 )
559
563
assert default_df .get_backend () == "Test_casting_default"
560
- assert cluster_df .get_backend () == "Cluster"
564
+ assert (
565
+ cluster_df .get_backend () == "Test_casting_default"
566
+ ) # in place cast to default
561
567
assert df3 .get_backend () == default_df .get_backend () # should move to default
562
568
563
569
564
570
def test_two_two_qc_types_default_lhs (default_df , cluster_df ):
565
571
# none of the query compilers know about each other here
566
572
# so we default to the caller
567
573
df3 = pd .concat ([cluster_df , default_df ], axis = 1 )
568
- assert default_df .get_backend () == "Test_casting_default"
574
+ assert default_df .get_backend () == "Cluster" # in place cast to Cluster
569
575
assert cluster_df .get_backend () == "Cluster"
570
576
assert df3 .get_backend () == cluster_df .get_backend () # should move to cluster
571
577
572
578
573
579
def test_two_two_qc_types_default_2_rhs (default_df , cloud_df ):
574
580
# cloud knows a bit about costing; so we prefer moving to there
575
581
df3 = pd .concat ([default_df , cloud_df ], axis = 1 )
576
- assert default_df .get_backend () == "Test_casting_default"
582
+ assert default_df .get_backend () == "Cloud" # inplace cast to Cloud
577
583
assert cloud_df .get_backend () == "Cloud"
578
584
assert df3 .get_backend () == cloud_df .get_backend () # should move to cloud
579
585
580
586
581
587
def test_two_two_qc_types_default_2_lhs (default_df , cloud_df ):
582
588
# cloud knows a bit about costing; so we prefer moving to there
583
589
df3 = pd .concat ([cloud_df , default_df ], axis = 1 )
584
- assert default_df .get_backend () == "Test_casting_default"
590
+ assert default_df .get_backend () == "Cloud" # inplace cast to Cloud
585
591
assert cloud_df .get_backend () == "Cloud"
586
592
assert df3 .get_backend () == cloud_df .get_backend () # should move to cloud
587
593
@@ -651,6 +657,18 @@ def test_qc_mixed_loc(pico_df, cloud_df):
651
657
assert cloud_df1 [pico_df1 [0 ][0 ]][pico_df1 [0 ][1 ]] == 1
652
658
653
659
660
+ def test_merge_in_place (default_df , lazy_df , cloud_df ):
661
+ # lazy_df tries to pawn off work on other engines
662
+ df = default_df .merge (lazy_df )
663
+ assert type (df ) is type (default_df )
664
+ # Both arguments now have the same qc type
665
+ assert type (lazy_df ) is type (default_df )
666
+
667
+ df = cloud_df .merge (lazy_df )
668
+ assert type (df ) is type (cloud_df )
669
+ assert type (lazy_df ) is type (cloud_df )
670
+
671
+
654
672
def test_information_asymmetry (default_df , cloud_df , eager_df , lazy_df ):
655
673
# normally, the default query compiler should be chosen
656
674
# here, but since eager knows about default, but not
0 commit comments