Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
346 changes: 0 additions & 346 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,78 +228,6 @@ def test_create_partitioned_table(client, to_delete):
assert table.time_partitioning.expiration_ms == 7776000000


def test_load_and_query_partitioned_table(client, to_delete):
dataset_id = "load_partitioned_table_dataset_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_load_table_partitioned]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("date", "DATE"),
]
job_config.skip_leading_rows = 1
job_config.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="date", # name of column to use for partitioning
expiration_ms=7776000000,
) # 90 days
uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv"

load_job = client.load_table_from_uri(
uri, dataset_ref.table(table_id), job_config=job_config
) # API request

assert load_job.job_type == "load"

load_job.result() # Waits for table load to complete.

table = client.get_table(dataset_ref.table(table_id))
print("Loaded {} rows to table {}".format(table.num_rows, table_id))
# [END bigquery_load_table_partitioned]
assert table.num_rows == 50

project_id = client.project

# [START bigquery_query_partitioned_table]
import datetime

# from google.cloud import bigquery
# client = bigquery.Client()
# project_id = 'my-project'
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

sql_template = """
SELECT *
FROM `{}.{}.{}`
WHERE date BETWEEN @start_date AND @end_date
"""
sql = sql_template.format(project_id, dataset_id, table_id)
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = [
bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)),
bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)),
]

# API request
query_job = client.query(sql, job_config=job_config)

rows = list(query_job)
print("{} states were admitted to the US in the 1800s".format(len(rows)))
# [END bigquery_query_partitioned_table]
assert len(rows) == 29


@pytest.mark.skip(
reason=(
"update_table() is flaky "
Expand Down Expand Up @@ -1327,35 +1255,6 @@ def test_extract_table_compressed(client, to_delete):
to_delete.insert(0, blob)


def test_client_query_legacy_sql(client):
"""Run a query with Legacy SQL explicitly set"""
# [START bigquery_query_legacy]
# from google.cloud import bigquery
# client = bigquery.Client()

query = (
"SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
'WHERE state = "TX" '
"LIMIT 100"
)

# Set use_legacy_sql to True to use legacy SQL syntax.
job_config = bigquery.QueryJobConfig()
job_config.use_legacy_sql = True

query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results.
for row in query_job: # API request - fetches results
print(row)
# [END bigquery_query_legacy]


def test_client_query_total_rows(client, capsys):
"""Run a query and just check for how many rows."""
# [START bigquery_query_total_rows]
Expand Down Expand Up @@ -1420,251 +1319,6 @@ def test_manage_job(client):
# [END bigquery_get_job]


def test_client_query_w_named_params(client, capsys):
"""Run a query using named query parameters"""

# [START bigquery_query_params_named]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = @corpus
AND word_count >= @min_word_count
ORDER BY word_count DESC;
"""
query_params = [
bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.word, row.word_count))

assert query_job.state == "DONE"
# [END bigquery_query_params_named]

out, _ = capsys.readouterr()
assert "the" in out


def test_client_query_w_positional_params(client, capsys):
"""Run a query using query parameters"""

# [START bigquery_query_params_positional]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = ?
AND word_count >= ?
ORDER BY word_count DESC;
"""
# Set the name to None to use positional parameters.
# Note that you cannot mix named and positional parameters.
query_params = [
bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter(None, "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.word, row.word_count))

assert query_job.state == "DONE"
# [END bigquery_query_params_positional]

out, _ = capsys.readouterr()
assert "the" in out


def test_client_query_w_timestamp_params(client, capsys):
"""Run a query using query parameters"""

# [START bigquery_query_params_timestamps]
# from google.cloud import bigquery
# client = bigquery.Client()

import datetime
import pytz

query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);"
query_params = [
bigquery.ScalarQueryParameter(
"ts_value",
"TIMESTAMP",
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
)
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print(row)

assert query_job.state == "DONE"
# [END bigquery_query_params_timestamps]

out, _ = capsys.readouterr()
assert "2016, 12, 7, 9, 0" in out


def test_client_query_w_array_params(client, capsys):
"""Run a query using array query parameters"""
# [START bigquery_query_params_arrays]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT name, sum(number) as count
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE gender = @gender
AND state IN UNNEST(@states)
GROUP BY name
ORDER BY count DESC
LIMIT 10;
"""
query_params = [
bigquery.ScalarQueryParameter("gender", "STRING", "M"),
bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.name, row.count))

assert query_job.state == "DONE"
# [END bigquery_query_params_arrays]

out, _ = capsys.readouterr()
assert "James" in out


def test_client_query_w_struct_params(client, capsys):
"""Run a query using struct query parameters"""
# [START bigquery_query_params_structs]
# from google.cloud import bigquery
# client = bigquery.Client()

query = "SELECT @struct_value AS s;"
query_params = [
bigquery.StructQueryParameter(
"struct_value",
bigquery.ScalarQueryParameter("x", "INT64", 1),
bigquery.ScalarQueryParameter("y", "STRING", "foo"),
)
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print(row.s)

assert query_job.state == "DONE"
# [END bigquery_query_params_structs]

out, _ = capsys.readouterr()
assert "1" in out
assert "foo" in out


def test_query_no_cache(client):
# [START bigquery_query_no_cache]
# from google.cloud import bigquery
# client = bigquery.Client()

job_config = bigquery.QueryJobConfig()
job_config.use_query_cache = False
sql = """
SELECT corpus
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
query_job = client.query(
sql,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request

# Print the results.
for row in query_job: # API request - fetches results
print(row)
# [END bigquery_query_no_cache]


def test_query_external_gcs_temporary_table(client):
# [START bigquery_query_external_gcs_temp]
# from google.cloud import bigquery
# client = bigquery.Client()

# Configure the external data source and query job
external_config = bigquery.ExternalConfig("CSV")
external_config.source_uris = [
"gs://cloud-samples-data/bigquery/us-states/us-states.csv"
]
external_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
external_config.options.skip_leading_rows = 1 # optionally skip header row
table_id = "us_states"
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}

# Example query to find states starting with 'W'
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)

query_job = client.query(sql, job_config=job_config) # API request

w_states = list(query_job) # Waits for query to finish
print("There are {} states with names starting with W.".format(len(w_states)))
# [END bigquery_query_external_gcs_temp]
assert len(w_states) == 4


def test_query_external_gcs_permanent_table(client, to_delete):
dataset_id = "query_external_gcs_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
Expand Down
2 changes: 1 addition & 1 deletion bigquery/docs/usage/queries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Run a query using a named query parameter
See BigQuery documentation for more information on
`parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/client_query_w_named_params.py
:language: python
:dedent: 4
:start-after: [START bigquery_query_params_named]
Expand Down
Loading