Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,5 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

service-account.json
service-account.json
upgrade-test*
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FROM quay.io/astronomer/astro-runtime:12.7.0
FROM quay.io/astronomer/astro-runtime:12.8.0
87 changes: 51 additions & 36 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,56 +1,71 @@
gateways:
public-demo: # this will use the config in tcloud.yaml for state_connection
scheduler: # TODO: add the connection info below into the Tobiko Cloud Connections Page with the service account json in plain text for this scheduler
type: cloud
default_catalog: sqlmesh-public-demo
connection: # This connection is used for automatic unit test generation and external model creation
type: bigquery
method: service-account-json
concurrent_tasks: 5
register_comments: true
keyfile_json: {{ env_var('GOOGLE_SQLMESH_CREDENTIALS') }}
project: sqlmesh-public-demo
postgres:
connection:
type: postgres
host: {{ env_var('SQLMESH_STATE_HOST') }}
port: 5432
user: {{ env_var('SQLMESH_STATE_USERNAME') }}
password: {{ env_var('SQLMESH_STATE_PASSWORD') }}
database: sqlmesh_state_demo
public-demo: # this will use the config in tcloud.yaml for state_connection
scheduler: # TODO: add the connection info below into the Tobiko Cloud Connections Page with the service account json in plain text for this scheduler
type: cloud
default_catalog: sqlmesh-public-demo
connection: # This connection is used for automatic unit test generation, external model creation, and table diffing
type: bigquery
method: service-account-json
concurrent_tasks: 5
register_comments: true
keyfile_json: {{ env_var('GOOGLE_SQLMESH_CREDENTIALS') }}
project: sqlmesh-public-demo
# postgres:
# connection:
# type: postgres
# host: {{ env_var('SQLMESH_STATE_HOST') }}
# port: 5432
# user: {{ env_var('SQLMESH_STATE_USERNAME') }}
# password: {{ env_var('SQLMESH_STATE_PASSWORD') }}
# database: sqlmesh_state_demo

# Uncomment this line to use the default target environment when running
# tcloud sqlmesh plan
# export USER=<your name>
# it will resolve to: tcloud sqlmesh plan dev_USER
# default_target_environment: dev_{{ env_var('USER','sung') }}

default_gateway: public-demo
project: repo_a

model_defaults:
dialect: bigquery
start: 2024-12-01
dialect: bigquery
start: 2024-12-01
# cron: '@hourly'

# Uncomment this line to prevent plans being applied to prod during development
# allow_prod_deploy: {{ env_var('ALLOW_PROD_DEPLOY', 'false') }}

# Linting rules to enforce standards for your team
linter:
enabled: true
# error if ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"] rules violated
rules: ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"]
# warn_rules: ["noselectstar"]
# ignored_rules: ["noselectstar"]

enabled: true
# error if ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"] rules violated
rules: ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"]
# warn_rules: ["noselectstar"]
# ignored_rules: ["noselectstar"]

# enables synchronized deployments to prod when a PR is merged
cicd_bot:
type: github
merge_method: squash
enable_deploy_command: true
skip_pr_backfill: false
auto_categorize_changes:
external: full
python: full
sql: full
seed: full
type: github
merge_method: squash
enable_deploy_command: false
skip_pr_backfill: false
auto_categorize_changes:
external: full
python: full
sql: full
seed: full

# Uncomment the following lines to enable auto mode for faster development
plan:
enable_preview: true
# no_diff: true
# use_finalized_state: true
# no_prompts: true
# auto_apply: true

# list of users that are allowed to approve PRs for synchronized deployments
# When a user clicks "approve" on a PR, the PR will be automatically deployed to the target environment and merged into the main branch
users:
- username: sung_sqlmesh_demo
github_username: sungchun12
Expand Down
3 changes: 3 additions & 0 deletions external_models.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
- name: '`bigquery-public-data`.`ga4_obfuscated_sample_ecommerce`.`events_20210131`'
audits:
- name: not_null
columns: "[event_date]"
columns:
event_date: STRING
event_timestamp: INT64
Expand Down
3 changes: 2 additions & 1 deletion models/examples/blueprint_demo.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ MODEL (
(customer := customer4, paid_field := customer_persona, customer_filter := 4),
(customer := customer5, paid_field := customer_persona, customer_filter := 5)
),
grain (customer_id, order_id),
audits (
unique_combination_of_columns(columns := (customer_id, order_id)),
NOT_NULL(columns := (customer_id, order_id))
Expand All @@ -24,6 +25,6 @@ SELECT
purchase_date,
country,
@{paid_field} AS customer_persona /* use `@{variable}` syntax to make sqlmesh interpret the variable as a column */
FROM demo.seed_ecommerce
FROM tcloud_demo.seed_ecommerce
WHERE
customer_id = @customer_filter::INT64 /* we do `WHERE @condition` vs. `FROM @condition` to repsect the AST */
1 change: 0 additions & 1 deletion models/examples/external_model.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ MODEL (
name tcloud_demo.external_model
);

/* run this to create the schema file from the table's metadata: sqlmesh create_external_models */
SELECT
event_date,
event_timestamp,
Expand Down
2 changes: 1 addition & 1 deletion models/incremental_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ SELECT
IF(event_name = 'blog_view', 'high', 'low') AS user_intent_level
FROM sqlmesh-public-demo.tcloud_raw_data.raw_events
WHERE
event_timestamp BETWEEN @start_ds AND @end_ds;
event_timestamp BETWEEN @start_ds AND @end_ds;
5 changes: 4 additions & 1 deletion models/incremental_events_allow_partials.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ MODEL (
NOT_NULL(columns = (
event_id
))
)
),
signals [
elt_sync(upstream_ref:=tcloud_demo.stg_orders, downstream_ref:= tcloud_demo.orders, date_column:=order_date)
]
);

/* How to work with incremental forward only models */ /* step 1: `sqlmesh plan dev` to create this model for the first time and backfill for all of history */ /* step 2: change the user_intent_level conditional value */ /* step 3: pick a start date to backfill like: '2024-06-18' */ /* step 4: validate only a portion of rows were backfilled: sqlmesh fetchdf "select * from tcloud_demo__dev.incremental_events" */ /* step 5: `sqlmesh plan` to promote to prod with a virtual update, note: the dev backfill preview won't be reused for promotion and is only for dev purposes */ /* step 6: sqlmesh plan --restate-model "tcloud_demo.incremental_events", to invoke a backfill to mirror dev's data preview */ /* step 7: pick the same backfill start date for prod as dev's above: '2024-06-18' */ /* step 8: validate changes to prod: sqlmesh fetchdf "select * from tcloud_demo.incremental_events" */ /* Note: by default, only complete intervals are processed, so if today was 2024-06-21 and the day isn't over, it would NOT backfill the day's interval of data because it's not complete */
Expand Down
2 changes: 1 addition & 1 deletion models/seed_ecommerce.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
MODEL (
name demo.seed_ecommerce,
name tcloud_demo.seed_ecommerce,
kind SEED (
path '../seeds/ecommerce.csv'
),
Expand Down
4 changes: 2 additions & 2 deletions models/staging/stg_payments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ SELECT
id AS payment_id,
order_id,
payment_method,
'advanced_cll_column' AS advanced_cll_column,
'advanced_cll_column' AS advanced_cll_column, /* Tobiko Cloud only feature */
amount / 100 AS amount, /* `amount` is currently stored in cents, so we convert it to dollars */
'3' AS new_column_demos /* non-breaking change example */
'hello' AS new_column_demos, /* non-breaking change example */
FROM tcloud_demo.seed_raw_payments
Empty file added packages.txt
Empty file.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
tcloud
tobiko-cloud-scheduler-facade[airflow,dagster]==1.0.2
tobiko-cloud-scheduler-facade[airflow,dagster]==1.0.7
pre-commit
dagster
dagster-webserver
18 changes: 18 additions & 0 deletions signals/elt_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import typing as t

from sqlmesh import signal, DatetimeRanges, ExecutionContext
from sqlglot import exp

# add the context argument to your function
@signal()
def elt_sync(batch: DatetimeRanges, context: ExecutionContext, upstream_ref: exp.Table, downstream_ref: exp.Table, date_column: exp.Column) -> bool:

upstream_max_date = context.engine_adapter.fetchdf(f"SELECT max({date_column}) from {upstream_ref}").iloc[0, 0]
current_max_date_this_model = context.engine_adapter.fetchdf(f"SELECT max({date_column}) from {downstream_ref}").iloc[0, 0]

if upstream_max_date > current_max_date_this_model:
print(f'Upstream ref has more date intervals than this model. Triggering run for: {downstream_ref}')
return True
else:
print(f'Upstream ref does not have more date intervals than this model. Not triggering run for: {downstream_ref}')
return False
12 changes: 6 additions & 6 deletions tcloud.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
projects:
public-demo:
url: https://cloud.tobikodata.com/sqlmesh/tobiko/public-demo/
gateway: public-demo
extras: bigquery,web,github,postgres
pip_executable: uv pip
default_project: public-demo
prod:
url: https://cloud.tobikodata.com/sqlmesh/tobiko/public-demo/
gateway: public-demo # gateway name in config.yaml
extras: bigquery,lsp,github
pip_executable: uv pip
default_project: prod
# you can alias the tcloud cli in your shell: alias sqlmesh='tcloud sqlmesh'
39 changes: 39 additions & 0 deletions tcloud_config_templates/config_bigquery.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
gateways:
tobiko_cloud: # this will use the config in tcloud.yaml for state_connection
scheduler: # TODO: add the connection info below into the Tobiko Cloud Connections Page with the service account json in plain text for this scheduler
type: cloud
default_catalog: sqlmesh-public-demo
connection: # This connection is used for automatic unit test generation and external model creation
type: bigquery
method: service-account-json
concurrent_tasks: 5
register_comments: true
keyfile_json: {{ env_var('GOOGLE_SQLMESH_CREDENTIALS') }} # export GOOGLE_SQLMESH_CREDENTIALS=<contents of the service account json file as a single line in plain text>
project: sqlmesh-public-demo

default_gateway: tobiko_cloud

model_defaults:
dialect: bigquery
start: 2024-12-01
cron: '@daily'

linter:
enabled: true
# error if ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"] rules violated
rules: ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"]
# warn_rules: ["noselectstar"]
# ignored_rules: ["noselectstar"]


# enables synchronized deployments to prod when a PR is merged
cicd_bot:
type: github
merge_method: squash
enable_deploy_command: true
skip_pr_backfill: false
auto_categorize_changes:
external: full
python: full
sql: full
seed: full
38 changes: 38 additions & 0 deletions tcloud_config_templates/config_redshift.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
gateways:
tobiko_cloud: # this will use the config in tcloud.yaml for state_connection
scheduler: # TODO: add the connection info below into the Tobiko Cloud Connections Page with the service account json in plain text for this scheduler
type: cloud
default_catalog: DEMO_DB # prod
connection: # This connection is used for automatic unit test generation and external model creation
type: redshift
host: <host>
user: <user>
password: {{ env_var('REDSHIFT_PASSWORD') }} # export REDSHIFT_PASSWORD=<your password>
database: <database>
port: <port>

default_gateway: tobiko_cloud

model_defaults:
dialect: redshift
start: 2024-12-01
cron: '@daily'

linter:
enabled: true
# error if ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"] rules violated
rules: ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"]
# warn_rules: ["noselectstar"]
# ignored_rules: ["noselectstar"]

# enables synchronized deployments to prod when a PR is merged
cicd_bot:
type: github
merge_method: squash
enable_deploy_command: true
skip_pr_backfill: false
auto_categorize_changes:
external: full
python: full
sql: full
seed: full
38 changes: 38 additions & 0 deletions tcloud_config_templates/config_snowflake.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
gateways:
tobiko_cloud: # this will use the config in tcloud.yaml for state_connection
scheduler: # TODO: add the connection info below into the Tobiko Cloud Connections Page with the service account json in plain text for this scheduler
type: cloud
default_catalog: DEMO_DB # prod
connection: # This connection is used for automatic unit test generation and external model creation
type: snowflake
account: idapznw-wq29399
user: DEMO_USER
password: {{ env_var('SNOWFLAKE_PASSWORD') }} # export SNOWFLAKE_PASSWORD=<your password>
database: DEMO_DB
warehouse: COMPUTE_WH

default_gateway: tobiko_cloud

model_defaults:
dialect: snowflake
start: 2024-12-01
cron: '@daily'

linter:
enabled: true
# error if ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"] rules violated
rules: ["ambiguousorinvalidcolumn", "invalidselectstarexpansion"]
# warn_rules: ["noselectstar"]
# ignored_rules: ["noselectstar"]

# enables synchronized deployments to prod when a PR is merged
cicd_bot:
type: github
merge_method: squash
enable_deploy_command: true
skip_pr_backfill: false
auto_categorize_changes:
external: full
python: full
sql: full
seed: full
7 changes: 7 additions & 0 deletions tcloud_config_templates/tcloud.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
projects:
prod:
url: https://cloud.tobikodata.com/sqlmesh/customer_name/project_name/
gateway: tobiko_cloud
extras: snowflake,web,github # update for your warehouse
pip_executable: uv pip
default_project: prod
Loading