|
14 | 14 | import pytz |
15 | 15 | from minio import Minio |
16 | 16 | from pyiceberg.catalog import load_catalog |
17 | | -from pyiceberg.partitioning import PartitionField, PartitionSpec |
| 17 | +from pyiceberg.partitioning import PartitionField, PartitionSpec, UNPARTITIONED_PARTITION_SPEC |
18 | 18 | from pyiceberg.schema import Schema |
19 | 19 | from pyiceberg.table.sorting import SortField, SortOrder |
20 | 20 | from pyiceberg.transforms import DayTransform, IdentityTransform |
21 | 21 | from pyiceberg.types import ( |
22 | 22 | DoubleType, |
| 23 | + LongType, |
23 | 24 | FloatType, |
24 | 25 | NestedField, |
25 | 26 | StringType, |
26 | 27 | StructType, |
27 | 28 | TimestampType, |
28 | 29 | TimestamptzType |
29 | 30 | ) |
| 31 | +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER |
30 | 32 |
|
31 | 33 | from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm |
32 | 34 | from helpers.config_cluster import minio_secret_key, minio_access_key |
@@ -485,3 +487,86 @@ def test_non_existing_tables(started_cluster): |
485 | 487 | assert "DB::Exception: Table" in str(e) |
486 | 488 | assert "doesn't exist" in str(e) |
487 | 489 |
|
| 490 | + |
| 491 | +def test_cluster_joins(started_cluster): |
| 492 | + node = started_cluster.instances["node1"] |
| 493 | + |
| 494 | + test_ref = f"test_join_tables_{uuid.uuid4()}" |
| 495 | + table_name = f"{test_ref}_table" |
| 496 | + table_name_2 = f"{test_ref}_table_2" |
| 497 | + |
| 498 | + root_namespace = f"{test_ref}_namespace" |
| 499 | + |
| 500 | + catalog = load_catalog_impl(started_cluster) |
| 501 | + catalog.create_namespace(root_namespace) |
| 502 | + |
| 503 | + schema = Schema( |
| 504 | + NestedField( |
| 505 | + field_id=1, |
| 506 | + name="tag", |
| 507 | + field_type=LongType(), |
| 508 | + required=False |
| 509 | + ), |
| 510 | + NestedField( |
| 511 | + field_id=2, |
| 512 | + name="name", |
| 513 | + field_type=StringType(), |
| 514 | + required=False, |
| 515 | + ), |
| 516 | + ) |
| 517 | + table = create_table(catalog, root_namespace, table_name, schema, |
| 518 | + partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER) |
| 519 | + data = [{"tag": 1, "name": "John"}, {"tag": 2, "name": "Jack"}] |
| 520 | + df = pa.Table.from_pylist(data) |
| 521 | + table.append(df) |
| 522 | + |
| 523 | + schema2 = Schema( |
| 524 | + NestedField( |
| 525 | + field_id=1, |
| 526 | + name="id", |
| 527 | + field_type=LongType(), |
| 528 | + required=False |
| 529 | + ), |
| 530 | + NestedField( |
| 531 | + field_id=2, |
| 532 | + name="second_name", |
| 533 | + field_type=StringType(), |
| 534 | + required=False, |
| 535 | + ), |
| 536 | + ) |
| 537 | + table2 = create_table(catalog, root_namespace, table_name_2, schema2, |
| 538 | + partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER) |
| 539 | + data = [{"id": 1, "second_name": "Dow"}, {"id": 2, "second_name": "Sparrow"}] |
| 540 | + df = pa.Table.from_pylist(data) |
| 541 | + table2.append(df) |
| 542 | + |
| 543 | + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) |
| 544 | + |
| 545 | + res = node.query( |
| 546 | + f""" |
| 547 | + SELECT t1.name,t2.second_name |
| 548 | + FROM {CATALOG_NAME}.`{root_namespace}.{table_name}` AS t1 |
| 549 | + JOIN {CATALOG_NAME}.`{root_namespace}.{table_name_2}` AS t2 |
| 550 | + ON t1.tag=t2.id |
| 551 | + ORDER BY ALL |
| 552 | + SETTINGS object_storage_cluster_join_mode='local' |
| 553 | + """ |
| 554 | + ) |
| 555 | + |
| 556 | + assert res == "Jack\tSparrow\nJohn\tDow\n" |
| 557 | + |
| 558 | + res = node.query( |
| 559 | + f""" |
| 560 | + SELECT name |
| 561 | + FROM {CATALOG_NAME}.`{root_namespace}.{table_name}` |
| 562 | + WHERE tag in ( |
| 563 | + SELECT id |
| 564 | + FROM {CATALOG_NAME}.`{root_namespace}.{table_name_2}` |
| 565 | + ) |
| 566 | + ORDER BY ALL |
| 567 | + SETTINGS object_storage_cluster_join_mode='local' |
| 568 | + """ |
| 569 | + ) |
| 570 | + |
| 571 | + assert res == "Jack\nJohn\n" |
| 572 | + |
0 commit comments