Merge remote-tracking branch 'github/main' into async-uploads-executor

TrevorBergeron · TrevorBergeron · commit 6047cc031460 · 2026-03-23T19:41:40.000Z
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -1822,9 +1822,9 @@ def melt(
         Arguments correspond to pandas.melt arguments.
         """
         # TODO: Implement col_level and ignore_index
-        value_labels: pd.Index = pd.Index(
-            [self.col_id_to_label[col_id] for col_id in value_vars]
-        )
+        value_labels: pd.Index = self.column_labels[
+            [self.value_columns.index(col_id) for col_id in value_vars]
+        ]
         id_labels = [self.col_id_to_label[col_id] for col_id in id_vars]
 
         unpivot_expr, (var_col_ids, unpivot_out, passthrough_cols) = unpivot(
@@ -3417,6 +3417,7 @@ def unpivot(
         joined_array, (labels_mapping, column_mapping) = labels_array.relational_join(
             array_value, type="cross"
         )
+
     new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
     # Last column is offsets
     index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
@@ -3426,20 +3427,24 @@ def unpivot(
     unpivot_exprs: List[ex.Expression] = []
     # Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
     for input_ids in unpivot_columns:
-        # row explode offset used to choose the input column
-        # we use offset instead of label as labels are not necessarily unique
-        cases = itertools.chain(
-            *(
-                (
-                    ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
-                    ex.deref(column_mapping[id_or_null])
-                    if (id_or_null is not None)
-                    else ex.const(None),
+        col_expr: ex.Expression
+        if not input_ids:
+            col_expr = ex.const(None, dtype=bigframes.dtypes.INT_DTYPE)
+        else:
+            # row explode offset used to choose the input column
+            # we use offset instead of label as labels are not necessarily unique
+            cases = itertools.chain(
+                *(
+                    (
+                        ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
+                        ex.deref(column_mapping[id_or_null])
+                        if (id_or_null is not None)
+                        else ex.const(None),
+                    )
+                    for i, id_or_null in enumerate(input_ids)
                 )
-                for i, id_or_null in enumerate(input_ids)
             )
-        )
-        col_expr = ops.case_when_op.as_expr(*cases)
+            col_expr = ops.case_when_op.as_expr(*cases)
         unpivot_exprs.append(col_expr)
 
     joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
@@ -3457,19 +3462,43 @@ def _pd_index_to_array_value(
     Create an ArrayValue from a list of label tuples.
     The last column will be row offsets.
     """
+    id_gen = bigframes.core.identifiers.standard_id_strings()
+    col_ids = [next(id_gen) for _ in range(index.nlevels)]
+    offset_id = next(id_gen)
+
     rows = []
     labels_as_tuples = utils.index_as_tuples(index)
     for row_offset in range(len(index)):
-        id_gen = bigframes.core.identifiers.standard_id_strings()
         row_label = labels_as_tuples[row_offset]
         row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
         row = {}
-        for label_part, id in zip(row_label, id_gen):
-            row[id] = label_part if pd.notnull(label_part) else None
-        row[next(id_gen)] = row_offset
+        for label_part, col_id in zip(row_label, col_ids):
+            row[col_id] = label_part if pd.notnull(label_part) else None
+        row[offset_id] = row_offset
         rows.append(row)
 
-    return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
+    if not rows:
+        dtypes_list = getattr(index, "dtypes", None)
+        if dtypes_list is None:
+            dtypes_list = (
+                [index.dtype] if hasattr(index, "dtype") else [pd.Float64Dtype()]
+            )
+
+        fields = []
+        for col_id, dtype in zip(col_ids, dtypes_list):
+            try:
+                pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
+            except Exception:
+                pa_type = pa.string()
+            fields.append(pa.field(col_id, pa_type))
+        fields.append(pa.field(offset_id, pa.int64()))
+        schema = pa.schema(fields)
+        pt = pa.Table.from_pylist([], schema=schema)
+    else:
+        pt = pa.Table.from_pylist(rows)
+        pt = pt.rename_columns([*col_ids, offset_id])
+
+    return core.ArrayValue.from_pyarrow(pt, session=session)
 
 
 def _resolve_index_col(
diff --git a/bigframes/extensions/pandas/__init__.py b/bigframes/extensions/pandas/__init__.py
@@ -11,3 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+"""
+BigQuery DataFrames automatically registers a pandas extenstion when imported.
+This allows you to use the power of the BigQuery engine with pandas objects
+directly.
+"""
+
+from bigframes.extensions.pandas.dataframe_accessor import (
+    PandasBigQueryDataFrameAccessor,
+)
+
+__all__ = ["PandasBigQueryDataFrameAccessor"]
diff --git a/bigframes/extensions/pandas/dataframe_accessor.py b/bigframes/extensions/pandas/dataframe_accessor.py
@@ -18,10 +18,12 @@
 import pandas.api.extensions
 
 import bigframes.core.global_session as bf_session
+from bigframes.core.logging import log_adapter
 import bigframes.pandas as bpd
 
 
-class AIAccessor:
+@log_adapter.class_logger
+class PandasAIAccessor:
     """
     Pandas DataFrame accessor for BigQuery AI functions.
     """
@@ -101,7 +103,8 @@ def forecast(
 
 
 @pandas.api.extensions.register_dataframe_accessor("bigquery")
-class BigQueryDataFrameAccessor:
+@log_adapter.class_logger
+class PandasBigQueryDataFrameAccessor:
     """
     Pandas DataFrame accessor for BigQuery DataFrames functionality.
 
@@ -112,11 +115,11 @@ def __init__(self, pandas_obj: pandas.DataFrame):
         self._obj = pandas_obj
 
     @property
-    def ai(self) -> "AIAccessor":
+    def ai(self) -> "PandasAIAccessor":
         """
         Accessor for BigQuery AI functions.
         """
-        return AIAccessor(self._obj)
+        return PandasAIAccessor(self._obj)
 
     def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
         """
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
@@ -27,7 +27,7 @@ BigQuery DataFrames provides extensions to pandas DataFrame objects.
 .. autosummary::
     :toctree: api
 
-    bigframes.extensions.pandas.dataframe_accessor.BigQueryDataFrameAccessor
+    bigframes.extensions.pandas
 
 ML APIs
 ~~~~~~~
diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py
@@ -2682,25 +2682,6 @@ def square(x: int) -> int:
         )
 
 
-@pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_ingress_settings_w_all(session):
-    ingress_settings_args = {"cloud_function_ingress_settings": "all"}
-
-    with pytest.raises(
-        google.api_core.exceptions.FailedPrecondition,
-        match="400.*allowedIngress violated",
-    ):
-
-        def square(x: int) -> int:
-            return x * x
-
-        session.remote_function(
-            reuse=False,
-            cloud_function_service_account="default",
-            **ingress_settings_args,
-        )(square)
-
-
 @pytest.mark.flaky(retries=2, delay=120)
 def test_remote_function_ingress_settings_unsupported(session):
     with pytest.raises(
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -5902,6 +5902,19 @@ def test_to_gbq_table_labels(scalars_df_index):
     assert table.labels["test"] == "labels"
 
 
+def test_to_gbq_obj_ref_persists(session):
+    # Test that saving and loading an Object Reference retains its dtype
+    bdf = session.from_glob_path(
+        "gs://cloud-samples-data/vision/ocr/*.jpg", name="uris"
+    ).head(1)
+
+    destination_table = "bigframes-dev.bigframes_tests_sys.test_obj_ref_persistence"
+    bdf.to_gbq(destination_table, if_exists="replace")
+
+    loaded_df = session.read_gbq(destination_table)
+    assert loaded_df["uris"].dtype == dtypes.OBJ_REF_DTYPE
+
+
 @pytest.mark.parametrize(
     ("col_names", "ignore_index"),
     [
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
@@ -1490,3 +1490,34 @@ def test_multiindex_eq_const(scalars_df_index, scalars_pandas_df_index):
     bigframes.testing.utils.assert_index_equal(
         pandas.Index(pd_result, dtype="boolean"), bf_result.to_pandas()
     )
+
+
+def test_count_empty_multiindex_columns(session):
+    df = pandas.DataFrame(
+        [], index=[1, 2], columns=pandas.MultiIndex.from_tuples([], names=["a", "b"])
+    )
+    bdf = session.read_pandas(df)
+
+    # count() operation unpivots columns, triggering the empty MultiIndex bug internally
+    count_df = bdf.count()
+
+    # The local fix ensures that empty unpivoted columns generate properly typed NULLs
+    # rather than failing syntax validation downstream in BigQuery.
+    # We compile to `.sql` to verify it succeeds locally without evaluating on BigQuery natively.
+    _ = count_df.to_frame().sql
+
+    # Assert structural layout is correct
+    assert count_df.index.nlevels == 2
+    assert list(count_df.index.names) == ["a", "b"]
+
+
+def test_dataframe_melt_multiindex(session):
+    # Tests that `melt` operations via count do not cause MultiIndex drops in Arrow
+    df = pandas.DataFrame({"A": [1], "B": ["string"], "C": [3]})
+    df.columns = pandas.MultiIndex.from_tuples(
+        [("Group1", "A"), ("Group2", "B"), ("Group1", "C")]
+    )
+    bdf = session.read_pandas(df)
+
+    count_df = bdf.count().to_pandas()
+    assert count_df.shape[0] == 3
diff --git a/tests/unit/extensions/pandas/test_registration.py b/tests/unit/extensions/pandas/test_registration.py
@@ -22,6 +22,8 @@ def test_bigframes_import_registers_accessor():
     df = pd.DataFrame({"a": [1]})
     # If bigframes was imported, df.bigquery should exist
     assert hasattr(df, "bigquery")
-    from bigframes.extensions.pandas.dataframe_accessor import BigQueryDataFrameAccessor
+    from bigframes.extensions.pandas.dataframe_accessor import (
+        PandasBigQueryDataFrameAccessor,
+    )
 
-    assert isinstance(df.bigquery, BigQueryDataFrameAccessor)
+    assert isinstance(df.bigquery, PandasBigQueryDataFrameAccessor)