Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 153de04

Browse files
committed
Merge branch 'main' into shuowei-fix-compiler-syntax-guards
2 parents e4e5347 + 2326ad6 commit 153de04

39 files changed

+9971
-9379
lines changed

.github/workflows/docs.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,3 @@ jobs:
3636
run: |
3737
python -m pip install --upgrade setuptools pip wheel
3838
python -m pip install nox
39-
- name: Run docfx
40-
run: |
41-
nox -s docfx

bigframes/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
)
3333
import bigframes.enums as enums # noqa: E402
3434
import bigframes.exceptions as exceptions # noqa: E402
35+
36+
# Register pandas extensions
37+
import bigframes.extensions.pandas.dataframe_accessor # noqa: F401, E402
3538
from bigframes.session import connect, Session # noqa: E402
3639
from bigframes.version import __version__ # noqa: E402
3740

bigframes/bigquery/_operations/sql.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,31 @@
1616

1717
from __future__ import annotations
1818

19-
from typing import Sequence
19+
from typing import cast, Optional, Sequence, Union
2020

2121
import google.cloud.bigquery
2222

2323
from bigframes.core.compile.sqlglot import sql
24+
import bigframes.dataframe
2425
import bigframes.dtypes
2526
import bigframes.operations
2627
import bigframes.series
2728

2829

30+
def _format_names(sql_template: str, dataframe: bigframes.dataframe.DataFrame):
31+
"""Turn sql_template from a template that uses names to one that uses
32+
numbers.
33+
"""
34+
names_to_numbers = {name: f"{{{i}}}" for i, name in enumerate(dataframe.columns)}
35+
numbers = [f"{{{i}}}" for i in range(len(dataframe.columns))]
36+
return sql_template.format(*numbers, **names_to_numbers)
37+
38+
2939
def sql_scalar(
3040
sql_template: str,
31-
columns: Sequence[bigframes.series.Series],
41+
columns: Union[bigframes.dataframe.DataFrame, Sequence[bigframes.series.Series]],
42+
*,
43+
output_dtype: Optional[bigframes.dtypes.Dtype] = None,
3244
) -> bigframes.series.Series:
3345
"""Create a Series from a SQL template.
3446
@@ -37,6 +49,9 @@ def sql_scalar(
3749
>>> import bigframes.pandas as bpd
3850
>>> import bigframes.bigquery as bbq
3951
52+
Either pass in a sequence of series, in which case use integers in the
53+
format strings.
54+
4055
>>> s = bpd.Series(["1.5", "2.5", "3.5"])
4156
>>> s = s.astype(pd.ArrowDtype(pa.decimal128(38, 9)))
4257
>>> bbq.sql_scalar("ROUND({0}, 0, 'ROUND_HALF_EVEN')", [s])
@@ -45,13 +60,29 @@ def sql_scalar(
4560
2 4.000000000
4661
dtype: decimal128(38, 9)[pyarrow]
4762
63+
Or pass in a DataFrame, in which case use the column names in the format
64+
strings.
65+
66+
>>> df = bpd.DataFrame({"a": ["1.5", "2.5", "3.5"]})
67+
>>> df = df.astype({"a": pd.ArrowDtype(pa.decimal128(38, 9))})
68+
>>> bbq.sql_scalar("ROUND({a}, 0, 'ROUND_HALF_EVEN')", df)
69+
0 2.000000000
70+
1 2.000000000
71+
2 4.000000000
72+
dtype: decimal128(38, 9)[pyarrow]
73+
4874
Args:
4975
sql_template (str):
5076
A SQL format string with Python-style {0} placeholders for each of
5177
the Series objects in ``columns``.
52-
columns (Sequence[bigframes.pandas.Series]):
78+
columns (
79+
Sequence[bigframes.pandas.Series] | bigframes.pandas.DataFrame
80+
):
5381
Series objects representing the column inputs to the
5482
``sql_template``. Must contain at least one Series.
83+
output_dtype (a BigQuery DataFrames compatible dtype, optional):
84+
If provided, BigQuery DataFrames uses this to determine the output
85+
of the returned Series. This avoids a dry run query.
5586
5687
Returns:
5788
bigframes.pandas.Series:
@@ -60,28 +91,38 @@ def sql_scalar(
6091
Raises:
6192
ValueError: If ``columns`` is empty.
6293
"""
94+
if isinstance(columns, bigframes.dataframe.DataFrame):
95+
sql_template = _format_names(sql_template, columns)
96+
columns = [
97+
cast(bigframes.series.Series, columns[column]) for column in columns.columns
98+
]
99+
63100
if len(columns) == 0:
64101
raise ValueError("Must provide at least one column in columns")
65102

103+
base_series = columns[0]
104+
66105
# To integrate this into our expression trees, we need to get the output
67106
# type, so we do some manual compilation and a dry run query to get that.
68107
# Another benefit of this is that if there is a syntax error in the SQL
69108
# template, then this will fail with an error earlier in the process,
70109
# aiding users in debugging.
71-
literals_sql = [sql.to_sql(sql.literal(None, column.dtype)) for column in columns]
72-
select_sql = sql_template.format(*literals_sql)
73-
dry_run_sql = f"SELECT {select_sql}"
74-
75-
# Use the executor directly, because we want the original column IDs, not
76-
# the user-friendly column names that block.to_sql_query() would produce.
77-
base_series = columns[0]
78-
bqclient = base_series._session.bqclient
79-
job = bqclient.query(
80-
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
81-
)
82-
_, output_type = bigframes.dtypes.convert_schema_field(job.schema[0])
110+
if output_dtype is None:
111+
literals_sql = [
112+
sql.to_sql(sql.literal(None, column.dtype)) for column in columns
113+
]
114+
select_sql = sql_template.format(*literals_sql)
115+
dry_run_sql = f"SELECT {select_sql}"
116+
117+
# Use the executor directly, because we want the original column IDs, not
118+
# the user-friendly column names that block.to_sql_query() would produce.
119+
bqclient = base_series._session.bqclient
120+
job = bqclient.query(
121+
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
122+
)
123+
_, output_dtype = bigframes.dtypes.convert_schema_field(job.schema[0])
83124

84125
op = bigframes.operations.SqlScalarOp(
85-
_output_type=output_type, sql_template=sql_template
126+
_output_type=output_dtype, sql_template=sql_template
86127
)
87128
return base_series._apply_nary_op(op, columns[1:])

bigframes/extensions/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from typing import cast
16+
17+
import pandas
18+
import pandas.api.extensions
19+
20+
import bigframes.core.global_session as bf_session
21+
import bigframes.pandas as bpd
22+
23+
24+
@pandas.api.extensions.register_dataframe_accessor("bigquery")
25+
class BigQueryDataFrameAccessor:
26+
"""
27+
Pandas DataFrame accessor for BigQuery DataFrames functionality.
28+
29+
This accessor is registered under the ``bigquery`` namespace on pandas DataFrame objects.
30+
"""
31+
32+
def __init__(self, pandas_obj: pandas.DataFrame):
33+
self._obj = pandas_obj
34+
35+
def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
36+
"""
37+
Compute a new pandas Series by applying a SQL scalar function to the DataFrame.
38+
39+
The DataFrame is converted to BigFrames by calling ``read_pandas``, then the SQL
40+
template is applied using ``bigframes.bigquery.sql_scalar``, and the result is
41+
converted back to a pandas Series using ``to_pandas``.
42+
43+
Args:
44+
sql_template (str):
45+
A SQL format string with Python-style {0}, {1}, etc. placeholders for each of
46+
the columns in the DataFrame (in the order they appear in ``df.columns``).
47+
output_dtype (a BigQuery DataFrames compatible dtype, optional):
48+
If provided, BigQuery DataFrames uses this to determine the output
49+
of the returned Series. This avoids a dry run query.
50+
session (bigframes.session.Session, optional):
51+
The BigFrames session to use. If not provided, the default global session is used.
52+
53+
Returns:
54+
pandas.Series:
55+
The result of the SQL scalar function as a pandas Series.
56+
"""
57+
# Import bigframes.bigquery here to avoid circular imports
58+
import bigframes.bigquery
59+
60+
if session is None:
61+
session = bf_session.get_global_session()
62+
63+
bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
64+
result = bigframes.bigquery.sql_scalar(
65+
sql_template, bf_df, output_dtype=output_dtype
66+
)
67+
return result.to_pandas(ordered=True)

bigframes/pandas/core/methods/describe.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ def describe(
5656
"max",
5757
]
5858
).intersection(describe_block.column_labels.get_level_values(-1))
59-
describe_block = describe_block.stack(override_labels=stack_cols)
60-
61-
return dataframe.DataFrame(describe_block).droplevel(level=0)
59+
if not stack_cols.empty:
60+
describe_block = describe_block.stack(override_labels=stack_cols)
61+
return dataframe.DataFrame(describe_block).droplevel(level=0)
62+
return dataframe.DataFrame(describe_block)
6263

6364

6465
def _describe(
@@ -120,5 +121,7 @@ def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
120121
dtypes.TIME_DTYPE,
121122
]:
122123
return [aggregations.count_op, aggregations.nunique_op]
124+
elif dtypes.is_json_like(dtype) or dtype == dtypes.OBJ_REF_DTYPE:
125+
return [aggregations.count_op]
123126
else:
124127
return []

docs/conf.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,12 @@
5959
"sphinx.ext.todo",
6060
"sphinx.ext.viewcode",
6161
"sphinx_sitemap",
62-
"myst_parser",
62+
"myst_nb",
6363
]
6464

65+
# myst-nb configuration
66+
nb_execution_mode = "off"
67+
6568
# autodoc/autosummary flags
6669
autoclass_content = "both"
6770
autodoc_default_options = {"members": True}
@@ -269,12 +272,16 @@
269272

270273

271274
suppress_warnings = [
275+
# Allow unknown mimetype so we can use widgets in tutorial notebooks.
276+
"mystnb.unknown_mime_type",
272277
# Temporarily suppress this to avoid "more than one target found for
273278
# cross-reference" warning, which are intractable for us to avoid while in
274279
# a mono-repo.
275280
# See https://github.com/sphinx-doc/sphinx/blob
276281
# /2a65ffeef5c107c19084fabdd706cdff3f52d93c/sphinx/domains/python.py#L843
277-
"ref.python"
282+
"ref.python",
283+
# Allow external websites to be down occasionally.
284+
"intersphinx.external",
278285
]
279286

280287
# -- Options for LaTeX output ---------------------------------------------
@@ -383,7 +390,8 @@
383390
"grpc": ("https://grpc.github.io/grpc/python/", None),
384391
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
385392
"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
386-
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
393+
# TODO(tswast): re-enable if we can get temporary failures to be ignored.
394+
# "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
387395
"pydata-google-auth": (
388396
"https://pydata-google-auth.readthedocs.io/en/latest/",
389397
None,

docs/notebooks

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../notebooks

docs/reference/index.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@ packages.
1919
bigframes.pandas.api.typing
2020
bigframes.streaming
2121

22+
Pandas Extensions
23+
~~~~~~~~~~~~~~~~~
24+
25+
BigQuery DataFrames provides extensions to pandas DataFrame objects.
26+
27+
.. autosummary::
28+
:toctree: api
29+
30+
bigframes.extensions.pandas.dataframe_accessor.BigQueryDataFrameAccessor
31+
2232
ML APIs
2333
~~~~~~~
2434

0 commit comments

Comments
 (0)