From 1583090623c5083de504ad1e1710b894f0dc7f67 Mon Sep 17 00:00:00 2001 From: Sebastian Liebscher <112352529+sebastianliebscher@users.noreply.github.com> Date: Sat, 20 May 2023 20:57:18 +0200 Subject: [PATCH] chore(sqllab): remove deprecated PyArrow API (#24135) --- requirements/base.txt | 10 +++++++++- setup.py | 2 +- superset/sql_lab.py | 12 ++++-------- superset/sqllab/utils.py | 11 +++++++++++ superset/views/utils.py | 4 ++-- 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 93721bf232ea..4538977c738c 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -122,6 +122,8 @@ geographiclib==1.52 # via geopy geopy==2.2.0 # via apache-superset +greenlet==2.0.2 + # via sqlalchemy gunicorn==20.1.0 # via apache-superset hashids==1.3.1 @@ -134,6 +136,8 @@ humanize==3.11.0 # via apache-superset idna==3.2 # via email-validator +importlib-metadata==6.6.0 + # via flask importlib-resources==5.12.0 # via limits isodate==0.6.0 @@ -209,7 +213,7 @@ prison==0.2.1 # via flask-appbuilder prompt-toolkit==3.0.38 # via click-repl -pyarrow==10.0.1 +pyarrow==12.0.0 # via apache-superset pycparser==2.20 # via cffi @@ -322,6 +326,10 @@ wtforms-json==0.3.5 # via apache-superset xlsxwriter==3.0.7 # via apache-superset +zipp==3.15.0 + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/setup.py b/setup.py index d59f0c6496a0..09fde6b6940b 100644 --- a/setup.py +++ b/setup.py @@ -111,7 +111,7 @@ def get_git_sha() -> str: "python-dateutil", "python-dotenv", "python-geohash", - "pyarrow>=10.0.1, <11", + "pyarrow>=12.0.0, <13", "pyyaml>=5.4", "PyJWT>=2.4.0, <3.0", "redis>=4.5.4, <5.0", diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 5cb52d4d1cc3..9ea881fadf95 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -24,7 +24,6 @@ import backoff import msgpack -import pyarrow as pa import simplejson as json from celery import Task from celery.exceptions import SoftTimeLimitExceeded @@ -51,6 +50,7 @@ from superset.result_set import SupersetResultSet from superset.sql_parse import CtasMethod, insert_rls, ParsedQuery from superset.sqllab.limiting_factor import LimitingFactor +from superset.sqllab.utils import write_ipc_buffer from superset.utils.celery import session_scope from superset.utils.core import ( json_iso_dttm_ser, @@ -355,12 +355,7 @@ def _serialize_and_expand_data( with stats_timing( "sqllab.query.results_backend_pa_serialization", stats_logger ): - data = ( - pa.default_serialization_context() - .serialize(result_set.pa_table) - .to_buffer() - .to_pybytes() - ) + data = write_ipc_buffer(result_set.pa_table).to_pybytes() # expand when loading data from results backend all_columns, expanded_columns = (selected_columns, []) @@ -379,7 +374,8 @@ def _serialize_and_expand_data( return (data, selected_columns, all_columns, expanded_columns) -def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches +def execute_sql_statements( + # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches query_id: int, rendered_query: str, return_results: bool, diff --git a/superset/sqllab/utils.py b/superset/sqllab/utils.py index 8181b5bd29b2..3bcd7308a128 100644 --- a/superset/sqllab/utils.py +++ b/superset/sqllab/utils.py @@ -16,6 +16,8 @@ # under the License. from typing import Any, Dict +import pyarrow as pa + from superset.common.db_query_status import QueryStatus @@ -45,3 +47,12 @@ def is_require_to_apply() -> bool: sql_results["data"] = sql_results["data"][:max_rows_in_result] sql_results["displayLimitReached"] = True return sql_results + + +def write_ipc_buffer(table: pa.Table) -> pa.Buffer: + sink = pa.BufferOutputStream() + + with pa.ipc.new_stream(sink, table.schema) as writer: + writer.write_table(table) + + return sink.getvalue() diff --git a/superset/views/utils.py b/superset/views/utils.py index a53e7500406f..a366ac683c20 100644 --- a/superset/views/utils.py +++ b/superset/views/utils.py @@ -55,7 +55,6 @@ logger = logging.getLogger(__name__) stats_logger = app.config["STATS_LOGGER"] - REJECTED_FORM_DATA_KEYS: List[str] = [] if not feature_flag_manager.is_feature_enabled("ENABLE_JAVASCRIPT_CONTROLS"): REJECTED_FORM_DATA_KEYS = ["js_tooltip", "js_onclick_href", "js_data_mutator"] @@ -562,7 +561,8 @@ def _deserialize_results_payload( with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger): try: - pa_table = pa.deserialize(ds_payload["data"]) + reader = pa.BufferReader(ds_payload["data"]) + pa_table = pa.ipc.open_stream(reader).read_all() except pa.ArrowSerializationError as ex: raise SerializationError("Unable to deserialize table") from ex