Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions api/python/test/canary/compiled/group_bys/gcp_ci/dim_listings.v1__0
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
{
"keyColumns": [
"listing_id"
],
"metaData": {
"columnHashes": {
"brief_description": "b0003345a0e958b8b3b7d459b95aa498",
"currency": "9e1e0b5fc5bf5af7380e4d067975ae8a",
"headline": "5e58bf6f73c7186e1dd49fdf99ead6a2",
"inventory_count": "7047981f2be609063042a544e288d1f8",
"is_active": "58a801281a7bda2d527d8dba71d1ea34",
"is_expensive": "9bac4d7297171a854e1508c64b219855",
"is_in_stock": "7abfc73721d835538c41c0a2cdfe06fd",
"listing_id": "e689b0d2de9eec72f1449ed41e5b120e",
"long_description": "6bb2a4f4ab6e29dfa5e9f5a89887bbc9",
"main_image_path": "6937248e639533abf7440733032f56d7",
"merchant_id": "454baf7df2b65cca4203e6cece3ebed2",
"price_cents": "06139ed053aeed0d4c3b786ce27eafbf",
"primary_category": "ac95cd3163de2b74cb5d14c43395e313",
"secondary_image_paths": "a470c5315ebce24e2fb978522c42938e",
"tags": "56055df8ef14828941f4776effed9025",
"weight_grams": "8c6315082571ed2a7da978595ad4a772"
},
"customJson": "{\"airflowDependencies\": [{\"name\": \"wf_data_gcp_exports_dim_listings__0_with_offset_0\", \"spec\": \"data.gcp_exports_dim_listings__0/ds={{ macros.ds_add(ds, 0) }}\"}]}",
"executionInfo": {
"clusterConf": {
"common": {},
"modeClusterConfigs": {
"upload": {
"dataproc.config": "{\"gceClusterConfig\": {\"subnetworkUri\": \"default\", \"serviceAccount\": \"[email protected]\", \"serviceAccountScopes\": [\"https://www.googleapis.com/auth/cloud-platform\", \"https://www.googleapis.com/auth/cloud.useraccounts.readonly\", \"https://www.googleapis.com/auth/devstorage.read_write\", \"https://www.googleapis.com/auth/logging.write\"], \"metadata\": {\"hive-version\": \"3.1.2\", \"SPARK_BQ_CONNECTOR_URL\": \"gs://spark-lib/bigquery/spark-3.5-bigquery-0.42.1.jar\", \"artifact_prefix\": \"gs://zipline-artifacts-canary\"}, \"tags\": []}, \"masterConfig\": {\"numInstances\": 1, \"machineTypeUri\": \"n2-highmem-8\", \"diskConfig\": {\"bootDiskType\": \"pd-standard\", \"bootDiskSizeGb\": 1024}}, \"workerConfig\": {\"numInstances\": 2, \"machineTypeUri\": \"n2-highmem-4\", \"diskConfig\": {\"bootDiskType\": \"pd-standard\", \"bootDiskSizeGb\": 64, \"numLocalSsds\": 2}}, \"softwareConfig\": {\"imageVersion\": \"2.2.50-debian12\", \"optionalComponents\": [\"FLINK\", \"JUPYTER\"], \"properties\": {}}, \"initializationActions\": [{\"executable_file\": \"gs://zipline-artifacts-canary/scripts/copy_java_security.sh\"}], \"endpointConfig\": {\"enableHttpPortAccess\": true}, \"lifecycleConfig\": {\"idleDeleteTtl\": \"7200s\"}}"
}
}
},
"conf": {
"common": {
"spark.chronon.cloud_provider": "gcp",
"spark.chronon.coalesce.factor": "10",
"spark.chronon.partition.column": "ds",
"spark.chronon.partition.format": "yyyy-MM-dd",
"spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
"spark.chronon.table.gcs.connector_output_dataset": "data",
"spark.chronon.table.gcs.connector_output_project": "canary-443022",
"spark.chronon.table.gcs.temporary_gcs_bucket": "zipline-warehouse-canary",
"spark.chronon.table_write.format": "iceberg",
"spark.chronon.table_write.prefix": "gs://zipline-warehouse-canary/data/tables/",
"spark.default.parallelism": "10",
"spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
"spark.sql.catalog.default_iceberg": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.gcp_location": "us-central1",
"spark.sql.catalog.default_iceberg.gcp_project": "canary-443022",
"spark.sql.catalog.default_iceberg.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.default_iceberg.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.spark_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.gcp_location": "us-central1",
"spark.sql.catalog.spark_catalog.gcp_project": "canary-443022",
"spark.sql.catalog.spark_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.spark_catalog.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
"spark.sql.shuffle.partitions": "10"
},
"modeConfigs": {
"backfill": {
"spark.chronon.backfill_cloud_provider": "gcp",
"spark.chronon.cloud_provider": "gcp",
"spark.chronon.coalesce.factor": "10",
"spark.chronon.partition.column": "ds",
"spark.chronon.partition.format": "yyyy-MM-dd",
"spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
"spark.chronon.table.gcs.connector_output_dataset": "data",
"spark.chronon.table.gcs.connector_output_project": "canary-443022",
"spark.chronon.table.gcs.temporary_gcs_bucket": "zipline-warehouse-canary",
"spark.chronon.table_write.format": "iceberg",
"spark.chronon.table_write.prefix": "gs://zipline-warehouse-canary/data/tables/",
"spark.default.parallelism": "10",
"spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
"spark.sql.catalog.default_iceberg": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.gcp_location": "us-central1",
"spark.sql.catalog.default_iceberg.gcp_project": "canary-443022",
"spark.sql.catalog.default_iceberg.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.default_iceberg.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.spark_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.gcp_location": "us-central1",
"spark.sql.catalog.spark_catalog.gcp_project": "canary-443022",
"spark.sql.catalog.spark_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.spark_catalog.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
"spark.sql.shuffle.partitions": "10"
}
}
},
"env": {
"common": {
"ARTIFACT_PREFIX": "gs://zipline-artifacts-dev",
"CHRONON_ONLINE_ARGS": " -Ztasks=4",
"CHRONON_ONLINE_CLASS": "[ONLINE-TODO]your.online.class",
"CLOUD_PROVIDER": "gcp",
"CUSTOMER_ID": "dev",
"ENABLE_PUBSUB": "true",
"FLINK_STATE_URI": "gs://zipline-warehouse-canary/flink-state",
"FRONTEND_URL": "https://canary-zipline-ui-703996152583.us-central1.run.app",
"GCP_BIGTABLE_INSTANCE_ID": "zipline-canary-instance",
"GCP_DATAPROC_CLUSTER_NAME": "zipline-canary-cluster",
"GCP_PROJECT_ID": "canary-443022",
"GCP_REGION": "us-central1",
"HADOOP_DIR": "[STREAMING-TODO]/path/to/folder/containing",
"HUB_URL": "https://canary-zipline-orchestration-703996152583.us-central1.run.app",
"JOB_MODE": "local[*]",
"PARTITION_COLUMN": "ds",
"PARTITION_FORMAT": "yyyy-MM-dd",
"VERSION": "latest"
},
"modeEnvironments": {
"upload": {
"ARTIFACT_PREFIX": "gs://zipline-artifacts-dev",
"CHRONON_ONLINE_ARGS": " -Ztasks=4",
"CHRONON_ONLINE_CLASS": "[ONLINE-TODO]your.online.class",
"CLOUD_PROVIDER": "gcp",
"CUSTOMER_ID": "dev",
"ENABLE_PUBSUB": "true",
"FLINK_STATE_URI": "gs://zipline-warehouse-canary/flink-state",
"FRONTEND_URL": "https://canary-zipline-ui-703996152583.us-central1.run.app",
"GCP_BIGTABLE_INSTANCE_ID": "zipline-canary-instance",
"GCP_DATAPROC_CLUSTER_NAME": "zipline-transient-upload-cluster",
"GCP_PROJECT_ID": "canary-443022",
"GCP_REGION": "us-central1",
"HADOOP_DIR": "[STREAMING-TODO]/path/to/folder/containing",
"HUB_URL": "https://canary-zipline-orchestration-703996152583.us-central1.run.app",
"JOB_MODE": "local[*]",
"PARTITION_COLUMN": "ds",
"PARTITION_FORMAT": "yyyy-MM-dd",
"VERSION": "latest"
}
}
},
"historicalBackfill": 0,
"scheduleCron": "@daily"
},
"name": "gcp_ci.dim_listings.v1__0",
"online": 1,
"outputNamespace": "data",
"sourceFile": "group_bys/gcp_ci/dim_listings.py",
"team": "gcp_ci",
"version": "0"
},
"sources": [
{
"entities": {
"query": {
"selects": {
"brief_description": "brief_description",
"currency": "currency",
"headline": "headline",
"inventory_count": "inventory_count",
"is_active": "is_active",
"is_expensive": "IF(price_cents > 10000, 1, 0)",
"is_in_stock": "IF(inventory_count > 0, 1, 0)",
"listing_id": "listing_id",
"long_description": "long_description",
"main_image_path": "main_image_path",
"merchant_id": "merchant_id",
"price_cents": "price_cents",
"primary_category": "primary_category",
"secondary_image_paths": "secondary_image_paths",
"tags": "tags",
"weight_grams": "weight_grams"
},
"startPartition": "2025-01-01"
},
"snapshotTable": "data.gcp_exports_dim_listings__0"
}
}
]
}
149 changes: 149 additions & 0 deletions api/python/test/canary/compiled/group_bys/gcp_ci/dim_merchants.v1__0
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"keyColumns": [
"listing_id"
],
Comment on lines +2 to +4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Key column mismatch: listing_id mapped from merchant_id.

Likely wrong primary key; use merchant_id consistently.

-  "keyColumns": [
-    "listing_id"
-  ],
+  "keyColumns": [
+    "merchant_id"
+  ],
...
-            "listing_id": "merchant_id",
+            "merchant_id": "merchant_id",

Follow-up: regenerate hashes if schema changes.

Also applies to: 139-142

🤖 Prompt for AI Agents
In api/python/test/canary/compiled/group_bys/gcp_ci/dim_merchants.v1__0 around
lines 2-4 (and also lines 139-142) the keyColumns array incorrectly lists
"listing_id" while the schema/field mappings use "merchant_id"; replace
"listing_id" with "merchant_id" so the primary key is consistent across
mappings, then regenerate any dependent hashes/artifacts to reflect the schema
change.

"metaData": {
"columnHashes": {
"listing_id": "ada8c9faeabcf9e68d4bb420c462f12c",
"primary_category": "97ce011a93d5d152feca9a6e6ac18cd0"
},
"customJson": "{\"airflowDependencies\": [{\"name\": \"wf_data_gcp_exports_dim_merchants__0_with_offset_0\", \"spec\": \"data.gcp_exports_dim_merchants__0/ds={{ macros.ds_add(ds, 0) }}\"}]}",
"executionInfo": {
"clusterConf": {
"common": {},
"modeClusterConfigs": {
"upload": {
"dataproc.config": "{\"gceClusterConfig\": {\"subnetworkUri\": \"default\", \"serviceAccount\": \"[email protected]\", \"serviceAccountScopes\": [\"https://www.googleapis.com/auth/cloud-platform\", \"https://www.googleapis.com/auth/cloud.useraccounts.readonly\", \"https://www.googleapis.com/auth/devstorage.read_write\", \"https://www.googleapis.com/auth/logging.write\"], \"metadata\": {\"hive-version\": \"3.1.2\", \"SPARK_BQ_CONNECTOR_URL\": \"gs://spark-lib/bigquery/spark-3.5-bigquery-0.42.1.jar\", \"artifact_prefix\": \"gs://zipline-artifacts-canary\"}, \"tags\": []}, \"masterConfig\": {\"numInstances\": 1, \"machineTypeUri\": \"n2-highmem-8\", \"diskConfig\": {\"bootDiskType\": \"pd-standard\", \"bootDiskSizeGb\": 1024}}, \"workerConfig\": {\"numInstances\": 2, \"machineTypeUri\": \"n2-highmem-4\", \"diskConfig\": {\"bootDiskType\": \"pd-standard\", \"bootDiskSizeGb\": 64, \"numLocalSsds\": 2}}, \"softwareConfig\": {\"imageVersion\": \"2.2.50-debian12\", \"optionalComponents\": [\"FLINK\", \"JUPYTER\"], \"properties\": {}}, \"initializationActions\": [{\"executable_file\": \"gs://zipline-artifacts-canary/scripts/copy_java_security.sh\"}], \"endpointConfig\": {\"enableHttpPortAccess\": true}, \"lifecycleConfig\": {\"idleDeleteTtl\": \"7200s\"}}"
}
}
},
"conf": {
"common": {
"spark.chronon.cloud_provider": "gcp",
"spark.chronon.coalesce.factor": "10",
"spark.chronon.partition.column": "ds",
"spark.chronon.partition.format": "yyyy-MM-dd",
"spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
"spark.chronon.table.gcs.connector_output_dataset": "data",
"spark.chronon.table.gcs.connector_output_project": "canary-443022",
"spark.chronon.table.gcs.temporary_gcs_bucket": "zipline-warehouse-canary",
"spark.chronon.table_write.format": "iceberg",
"spark.chronon.table_write.prefix": "gs://zipline-warehouse-canary/data/tables/",
"spark.default.parallelism": "10",
"spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
"spark.sql.catalog.default_iceberg": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.gcp_location": "us-central1",
"spark.sql.catalog.default_iceberg.gcp_project": "canary-443022",
"spark.sql.catalog.default_iceberg.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.default_iceberg.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.spark_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.gcp_location": "us-central1",
"spark.sql.catalog.spark_catalog.gcp_project": "canary-443022",
"spark.sql.catalog.spark_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.spark_catalog.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
"spark.sql.shuffle.partitions": "10"
},
"modeConfigs": {
"backfill": {
"spark.chronon.backfill_cloud_provider": "gcp",
"spark.chronon.cloud_provider": "gcp",
"spark.chronon.coalesce.factor": "10",
"spark.chronon.partition.column": "ds",
"spark.chronon.partition.format": "yyyy-MM-dd",
"spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
"spark.chronon.table.gcs.connector_output_dataset": "data",
"spark.chronon.table.gcs.connector_output_project": "canary-443022",
"spark.chronon.table.gcs.temporary_gcs_bucket": "zipline-warehouse-canary",
"spark.chronon.table_write.format": "iceberg",
"spark.chronon.table_write.prefix": "gs://zipline-warehouse-canary/data/tables/",
"spark.default.parallelism": "10",
"spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
"spark.sql.catalog.default_iceberg": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.gcp_location": "us-central1",
"spark.sql.catalog.default_iceberg.gcp_project": "canary-443022",
"spark.sql.catalog.default_iceberg.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.default_iceberg.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.spark_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.gcp_location": "us-central1",
"spark.sql.catalog.spark_catalog.gcp_project": "canary-443022",
"spark.sql.catalog.spark_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.spark_catalog.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
"spark.sql.shuffle.partitions": "10"
}
}
},
"env": {
"common": {
"ARTIFACT_PREFIX": "gs://zipline-artifacts-dev",
"CHRONON_ONLINE_ARGS": " -Ztasks=4",
"CHRONON_ONLINE_CLASS": "[ONLINE-TODO]your.online.class",
"CLOUD_PROVIDER": "gcp",
"CUSTOMER_ID": "dev",
"ENABLE_PUBSUB": "true",
"FLINK_STATE_URI": "gs://zipline-warehouse-canary/flink-state",
"FRONTEND_URL": "https://canary-zipline-ui-703996152583.us-central1.run.app",
"GCP_BIGTABLE_INSTANCE_ID": "zipline-canary-instance",
"GCP_DATAPROC_CLUSTER_NAME": "zipline-canary-cluster",
"GCP_PROJECT_ID": "canary-443022",
"GCP_REGION": "us-central1",
"HADOOP_DIR": "[STREAMING-TODO]/path/to/folder/containing",
"HUB_URL": "https://canary-zipline-orchestration-703996152583.us-central1.run.app",
"JOB_MODE": "local[*]",
"PARTITION_COLUMN": "ds",
"PARTITION_FORMAT": "yyyy-MM-dd",
"VERSION": "latest"
},
"modeEnvironments": {
"upload": {
"ARTIFACT_PREFIX": "gs://zipline-artifacts-dev",
"CHRONON_ONLINE_ARGS": " -Ztasks=4",
"CHRONON_ONLINE_CLASS": "[ONLINE-TODO]your.online.class",
"CLOUD_PROVIDER": "gcp",
"CUSTOMER_ID": "dev",
"ENABLE_PUBSUB": "true",
"FLINK_STATE_URI": "gs://zipline-warehouse-canary/flink-state",
"FRONTEND_URL": "https://canary-zipline-ui-703996152583.us-central1.run.app",
"GCP_BIGTABLE_INSTANCE_ID": "zipline-canary-instance",
"GCP_DATAPROC_CLUSTER_NAME": "zipline-transient-upload-cluster",
"GCP_PROJECT_ID": "canary-443022",
"GCP_REGION": "us-central1",
"HADOOP_DIR": "[STREAMING-TODO]/path/to/folder/containing",
"HUB_URL": "https://canary-zipline-orchestration-703996152583.us-central1.run.app",
"JOB_MODE": "local[*]",
"PARTITION_COLUMN": "ds",
"PARTITION_FORMAT": "yyyy-MM-dd",
"VERSION": "latest"
}
}
},
"historicalBackfill": 0,
"scheduleCron": "@daily"
},
"name": "gcp_ci.dim_merchants.v1__0",
"online": 1,
"outputNamespace": "data",
"sourceFile": "group_bys/gcp_ci/dim_merchants.py",
"team": "gcp_ci",
"version": "0"
},
"sources": [
{
"entities": {
"query": {
"selects": {
"listing_id": "merchant_id",
"primary_category": "primary_category"
},
"startPartition": "2025-01-01"
},
"snapshotTable": "data.gcp_exports_dim_merchants__0"
}
}
]
}
Loading