Add test for tables

ianton-ru · ianton-ru · commit 8ad06e79b878 · 2025-10-15T18:10:10.000+02:00
diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py
@@ -14,19 +14,21 @@
 import pytz
 from minio import Minio
 from pyiceberg.catalog import load_catalog
-from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.partitioning import PartitionField, PartitionSpec, UNPARTITIONED_PARTITION_SPEC
 from pyiceberg.schema import Schema
 from pyiceberg.table.sorting import SortField, SortOrder
 from pyiceberg.transforms import DayTransform, IdentityTransform
 from pyiceberg.types import (
     DoubleType,
+    LongType,
     FloatType,
     NestedField,
     StringType,
     StructType,
     TimestampType,
     TimestamptzType
 )
+from pyiceberg.table.sorting import UNSORTED_SORT_ORDER
 
 from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm
 from helpers.config_cluster import minio_secret_key, minio_access_key
@@ -485,3 +487,86 @@ def test_non_existing_tables(started_cluster):
         assert "DB::Exception: Table" in str(e)
         assert "doesn't exist" in str(e)
 
+
+def test_cluster_joins(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    test_ref = f"test_join_tables_{uuid.uuid4()}"
+    table_name = f"{test_ref}_table"
+    table_name_2 = f"{test_ref}_table_2"
+
+    root_namespace = f"{test_ref}_namespace"
+
+    catalog = load_catalog_impl(started_cluster)
+    catalog.create_namespace(root_namespace)
+
+    schema = Schema(
+        NestedField(
+            field_id=1,
+            name="tag",
+            field_type=LongType(),
+            required=False
+        ),
+        NestedField(
+            field_id=2,
+            name="name",
+            field_type=StringType(),
+            required=False,
+        ),
+    )
+    table = create_table(catalog, root_namespace, table_name, schema,
+                         partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER)
+    data = [{"tag": 1, "name": "John"}, {"tag": 2, "name": "Jack"}]
+    df = pa.Table.from_pylist(data)
+    table.append(df)
+
+    schema2 = Schema(
+        NestedField(
+            field_id=1,
+            name="id",
+            field_type=LongType(),
+            required=False
+        ),
+        NestedField(
+            field_id=2,
+            name="second_name",
+            field_type=StringType(),
+            required=False,
+        ),
+    )
+    table2 = create_table(catalog, root_namespace, table_name_2, schema2,
+                          partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER)
+    data = [{"id": 1, "second_name": "Dow"}, {"id": 2, "second_name": "Sparrow"}]
+    df = pa.Table.from_pylist(data)
+    table2.append(df)
+
+    create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
+
+    res = node.query(
+        f"""
+            SELECT t1.name,t2.second_name
+            FROM {CATALOG_NAME}.`{root_namespace}.{table_name}` AS t1
+                JOIN {CATALOG_NAME}.`{root_namespace}.{table_name_2}` AS t2
+                ON t1.tag=t2.id
+            ORDER BY ALL
+            SETTINGS object_storage_cluster_join_mode='local'
+        """
+    )
+
+    assert res == "Jack\tSparrow\nJohn\tDow\n"
+
+    res = node.query(
+        f"""
+            SELECT name
+            FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`
+            WHERE tag in (
+                SELECT id
+                FROM {CATALOG_NAME}.`{root_namespace}.{table_name_2}`
+            )
+            ORDER BY ALL
+            SETTINGS object_storage_cluster_join_mode='local'
+        """
+    )
+
+    assert res == "Jack\nJohn\n"
+