Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 6047cc0

Browse files
Merge remote-tracking branch 'github/main' into async-uploads-executor
2 parents f17c2d7 + e8c4603 commit 6047cc0

File tree

8 files changed

+117
-46
lines changed

8 files changed

+117
-46
lines changed

bigframes/core/blocks.py

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,9 +1822,9 @@ def melt(
18221822
Arguments correspond to pandas.melt arguments.
18231823
"""
18241824
# TODO: Implement col_level and ignore_index
1825-
value_labels: pd.Index = pd.Index(
1826-
[self.col_id_to_label[col_id] for col_id in value_vars]
1827-
)
1825+
value_labels: pd.Index = self.column_labels[
1826+
[self.value_columns.index(col_id) for col_id in value_vars]
1827+
]
18281828
id_labels = [self.col_id_to_label[col_id] for col_id in id_vars]
18291829

18301830
unpivot_expr, (var_col_ids, unpivot_out, passthrough_cols) = unpivot(
@@ -3417,6 +3417,7 @@ def unpivot(
34173417
joined_array, (labels_mapping, column_mapping) = labels_array.relational_join(
34183418
array_value, type="cross"
34193419
)
3420+
34203421
new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
34213422
# Last column is offsets
34223423
index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
@@ -3426,20 +3427,24 @@ def unpivot(
34263427
unpivot_exprs: List[ex.Expression] = []
34273428
# Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
34283429
for input_ids in unpivot_columns:
3429-
# row explode offset used to choose the input column
3430-
# we use offset instead of label as labels are not necessarily unique
3431-
cases = itertools.chain(
3432-
*(
3433-
(
3434-
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3435-
ex.deref(column_mapping[id_or_null])
3436-
if (id_or_null is not None)
3437-
else ex.const(None),
3430+
col_expr: ex.Expression
3431+
if not input_ids:
3432+
col_expr = ex.const(None, dtype=bigframes.dtypes.INT_DTYPE)
3433+
else:
3434+
# row explode offset used to choose the input column
3435+
# we use offset instead of label as labels are not necessarily unique
3436+
cases = itertools.chain(
3437+
*(
3438+
(
3439+
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3440+
ex.deref(column_mapping[id_or_null])
3441+
if (id_or_null is not None)
3442+
else ex.const(None),
3443+
)
3444+
for i, id_or_null in enumerate(input_ids)
34383445
)
3439-
for i, id_or_null in enumerate(input_ids)
34403446
)
3441-
)
3442-
col_expr = ops.case_when_op.as_expr(*cases)
3447+
col_expr = ops.case_when_op.as_expr(*cases)
34433448
unpivot_exprs.append(col_expr)
34443449

34453450
joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
@@ -3457,19 +3462,43 @@ def _pd_index_to_array_value(
34573462
Create an ArrayValue from a list of label tuples.
34583463
The last column will be row offsets.
34593464
"""
3465+
id_gen = bigframes.core.identifiers.standard_id_strings()
3466+
col_ids = [next(id_gen) for _ in range(index.nlevels)]
3467+
offset_id = next(id_gen)
3468+
34603469
rows = []
34613470
labels_as_tuples = utils.index_as_tuples(index)
34623471
for row_offset in range(len(index)):
3463-
id_gen = bigframes.core.identifiers.standard_id_strings()
34643472
row_label = labels_as_tuples[row_offset]
34653473
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
34663474
row = {}
3467-
for label_part, id in zip(row_label, id_gen):
3468-
row[id] = label_part if pd.notnull(label_part) else None
3469-
row[next(id_gen)] = row_offset
3475+
for label_part, col_id in zip(row_label, col_ids):
3476+
row[col_id] = label_part if pd.notnull(label_part) else None
3477+
row[offset_id] = row_offset
34703478
rows.append(row)
34713479

3472-
return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
3480+
if not rows:
3481+
dtypes_list = getattr(index, "dtypes", None)
3482+
if dtypes_list is None:
3483+
dtypes_list = (
3484+
[index.dtype] if hasattr(index, "dtype") else [pd.Float64Dtype()]
3485+
)
3486+
3487+
fields = []
3488+
for col_id, dtype in zip(col_ids, dtypes_list):
3489+
try:
3490+
pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
3491+
except Exception:
3492+
pa_type = pa.string()
3493+
fields.append(pa.field(col_id, pa_type))
3494+
fields.append(pa.field(offset_id, pa.int64()))
3495+
schema = pa.schema(fields)
3496+
pt = pa.Table.from_pylist([], schema=schema)
3497+
else:
3498+
pt = pa.Table.from_pylist(rows)
3499+
pt = pt.rename_columns([*col_ids, offset_id])
3500+
3501+
return core.ArrayValue.from_pyarrow(pt, session=session)
34733502

34743503

34753504
def _resolve_index_col(

bigframes/extensions/pandas/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,15 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
"""
16+
BigQuery DataFrames automatically registers a pandas extenstion when imported.
17+
This allows you to use the power of the BigQuery engine with pandas objects
18+
directly.
19+
"""
20+
21+
from bigframes.extensions.pandas.dataframe_accessor import (
22+
PandasBigQueryDataFrameAccessor,
23+
)
24+
25+
__all__ = ["PandasBigQueryDataFrameAccessor"]

bigframes/extensions/pandas/dataframe_accessor.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
import pandas.api.extensions
1919

2020
import bigframes.core.global_session as bf_session
21+
from bigframes.core.logging import log_adapter
2122
import bigframes.pandas as bpd
2223

2324

24-
class AIAccessor:
25+
@log_adapter.class_logger
26+
class PandasAIAccessor:
2527
"""
2628
Pandas DataFrame accessor for BigQuery AI functions.
2729
"""
@@ -101,7 +103,8 @@ def forecast(
101103

102104

103105
@pandas.api.extensions.register_dataframe_accessor("bigquery")
104-
class BigQueryDataFrameAccessor:
106+
@log_adapter.class_logger
107+
class PandasBigQueryDataFrameAccessor:
105108
"""
106109
Pandas DataFrame accessor for BigQuery DataFrames functionality.
107110
@@ -112,11 +115,11 @@ def __init__(self, pandas_obj: pandas.DataFrame):
112115
self._obj = pandas_obj
113116

114117
@property
115-
def ai(self) -> "AIAccessor":
118+
def ai(self) -> "PandasAIAccessor":
116119
"""
117120
Accessor for BigQuery AI functions.
118121
"""
119-
return AIAccessor(self._obj)
122+
return PandasAIAccessor(self._obj)
120123

121124
def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
122125
"""

docs/reference/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ BigQuery DataFrames provides extensions to pandas DataFrame objects.
2727
.. autosummary::
2828
:toctree: api
2929

30-
bigframes.extensions.pandas.dataframe_accessor.BigQueryDataFrameAccessor
30+
bigframes.extensions.pandas
3131

3232
ML APIs
3333
~~~~~~~

tests/system/large/functions/test_remote_function.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,25 +2682,6 @@ def square(x: int) -> int:
26822682
)
26832683

26842684

2685-
@pytest.mark.flaky(retries=2, delay=120)
2686-
def test_remote_function_ingress_settings_w_all(session):
2687-
ingress_settings_args = {"cloud_function_ingress_settings": "all"}
2688-
2689-
with pytest.raises(
2690-
google.api_core.exceptions.FailedPrecondition,
2691-
match="400.*allowedIngress violated",
2692-
):
2693-
2694-
def square(x: int) -> int:
2695-
return x * x
2696-
2697-
session.remote_function(
2698-
reuse=False,
2699-
cloud_function_service_account="default",
2700-
**ingress_settings_args,
2701-
)(square)
2702-
2703-
27042685
@pytest.mark.flaky(retries=2, delay=120)
27052686
def test_remote_function_ingress_settings_unsupported(session):
27062687
with pytest.raises(

tests/system/small/test_dataframe.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5902,6 +5902,19 @@ def test_to_gbq_table_labels(scalars_df_index):
59025902
assert table.labels["test"] == "labels"
59035903

59045904

5905+
def test_to_gbq_obj_ref_persists(session):
5906+
# Test that saving and loading an Object Reference retains its dtype
5907+
bdf = session.from_glob_path(
5908+
"gs://cloud-samples-data/vision/ocr/*.jpg", name="uris"
5909+
).head(1)
5910+
5911+
destination_table = "bigframes-dev.bigframes_tests_sys.test_obj_ref_persistence"
5912+
bdf.to_gbq(destination_table, if_exists="replace")
5913+
5914+
loaded_df = session.read_gbq(destination_table)
5915+
assert loaded_df["uris"].dtype == dtypes.OBJ_REF_DTYPE
5916+
5917+
59055918
@pytest.mark.parametrize(
59065919
("col_names", "ignore_index"),
59075920
[

tests/system/small/test_multiindex.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,3 +1490,34 @@ def test_multiindex_eq_const(scalars_df_index, scalars_pandas_df_index):
14901490
bigframes.testing.utils.assert_index_equal(
14911491
pandas.Index(pd_result, dtype="boolean"), bf_result.to_pandas()
14921492
)
1493+
1494+
1495+
def test_count_empty_multiindex_columns(session):
1496+
df = pandas.DataFrame(
1497+
[], index=[1, 2], columns=pandas.MultiIndex.from_tuples([], names=["a", "b"])
1498+
)
1499+
bdf = session.read_pandas(df)
1500+
1501+
# count() operation unpivots columns, triggering the empty MultiIndex bug internally
1502+
count_df = bdf.count()
1503+
1504+
# The local fix ensures that empty unpivoted columns generate properly typed NULLs
1505+
# rather than failing syntax validation downstream in BigQuery.
1506+
# We compile to `.sql` to verify it succeeds locally without evaluating on BigQuery natively.
1507+
_ = count_df.to_frame().sql
1508+
1509+
# Assert structural layout is correct
1510+
assert count_df.index.nlevels == 2
1511+
assert list(count_df.index.names) == ["a", "b"]
1512+
1513+
1514+
def test_dataframe_melt_multiindex(session):
1515+
# Tests that `melt` operations via count do not cause MultiIndex drops in Arrow
1516+
df = pandas.DataFrame({"A": [1], "B": ["string"], "C": [3]})
1517+
df.columns = pandas.MultiIndex.from_tuples(
1518+
[("Group1", "A"), ("Group2", "B"), ("Group1", "C")]
1519+
)
1520+
bdf = session.read_pandas(df)
1521+
1522+
count_df = bdf.count().to_pandas()
1523+
assert count_df.shape[0] == 3

tests/unit/extensions/pandas/test_registration.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ def test_bigframes_import_registers_accessor():
2222
df = pd.DataFrame({"a": [1]})
2323
# If bigframes was imported, df.bigquery should exist
2424
assert hasattr(df, "bigquery")
25-
from bigframes.extensions.pandas.dataframe_accessor import BigQueryDataFrameAccessor
25+
from bigframes.extensions.pandas.dataframe_accessor import (
26+
PandasBigQueryDataFrameAccessor,
27+
)
2628

27-
assert isinstance(df.bigquery, BigQueryDataFrameAccessor)
29+
assert isinstance(df.bigquery, PandasBigQueryDataFrameAccessor)

0 commit comments

Comments
 (0)