Skip to content

Commit

Permalink
chore(sqllab): remove deprecated PyArrow API (#24135)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastianliebscher committed May 20, 2023
1 parent 66594ad commit 1583090
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 12 deletions.
10 changes: 9 additions & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ geographiclib==1.52
# via geopy
geopy==2.2.0
# via apache-superset
greenlet==2.0.2
# via sqlalchemy
gunicorn==20.1.0
# via apache-superset
hashids==1.3.1
Expand All @@ -134,6 +136,8 @@ humanize==3.11.0
# via apache-superset
idna==3.2
# via email-validator
importlib-metadata==6.6.0
# via flask
importlib-resources==5.12.0
# via limits
isodate==0.6.0
Expand Down Expand Up @@ -209,7 +213,7 @@ prison==0.2.1
# via flask-appbuilder
prompt-toolkit==3.0.38
# via click-repl
pyarrow==10.0.1
pyarrow==12.0.0
# via apache-superset
pycparser==2.20
# via cffi
Expand Down Expand Up @@ -322,6 +326,10 @@ wtforms-json==0.3.5
# via apache-superset
xlsxwriter==3.0.7
# via apache-superset
zipp==3.15.0
# via
# importlib-metadata
# importlib-resources

# The following packages are considered to be unsafe in a requirements file:
# setuptools
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def get_git_sha() -> str:
"python-dateutil",
"python-dotenv",
"python-geohash",
"pyarrow>=10.0.1, <11",
"pyarrow>=12.0.0, <13",
"pyyaml>=5.4",
"PyJWT>=2.4.0, <3.0",
"redis>=4.5.4, <5.0",
Expand Down
12 changes: 4 additions & 8 deletions superset/sql_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import backoff
import msgpack
import pyarrow as pa
import simplejson as json
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
Expand All @@ -51,6 +50,7 @@
from superset.result_set import SupersetResultSet
from superset.sql_parse import CtasMethod, insert_rls, ParsedQuery
from superset.sqllab.limiting_factor import LimitingFactor
from superset.sqllab.utils import write_ipc_buffer
from superset.utils.celery import session_scope
from superset.utils.core import (
json_iso_dttm_ser,
Expand Down Expand Up @@ -355,12 +355,7 @@ def _serialize_and_expand_data(
with stats_timing(
"sqllab.query.results_backend_pa_serialization", stats_logger
):
data = (
pa.default_serialization_context()
.serialize(result_set.pa_table)
.to_buffer()
.to_pybytes()
)
data = write_ipc_buffer(result_set.pa_table).to_pybytes()

# expand when loading data from results backend
all_columns, expanded_columns = (selected_columns, [])
Expand All @@ -379,7 +374,8 @@ def _serialize_and_expand_data(
return (data, selected_columns, all_columns, expanded_columns)


def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches
def execute_sql_statements(
# pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches
query_id: int,
rendered_query: str,
return_results: bool,
Expand Down
11 changes: 11 additions & 0 deletions superset/sqllab/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# under the License.
from typing import Any, Dict

import pyarrow as pa

from superset.common.db_query_status import QueryStatus


Expand Down Expand Up @@ -45,3 +47,12 @@ def is_require_to_apply() -> bool:
sql_results["data"] = sql_results["data"][:max_rows_in_result]
sql_results["displayLimitReached"] = True
return sql_results


def write_ipc_buffer(table: pa.Table) -> pa.Buffer:
sink = pa.BufferOutputStream()

with pa.ipc.new_stream(sink, table.schema) as writer:
writer.write_table(table)

return sink.getvalue()
4 changes: 2 additions & 2 deletions superset/views/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
logger = logging.getLogger(__name__)
stats_logger = app.config["STATS_LOGGER"]


REJECTED_FORM_DATA_KEYS: List[str] = []
if not feature_flag_manager.is_feature_enabled("ENABLE_JAVASCRIPT_CONTROLS"):
REJECTED_FORM_DATA_KEYS = ["js_tooltip", "js_onclick_href", "js_data_mutator"]
Expand Down Expand Up @@ -562,7 +561,8 @@ def _deserialize_results_payload(

with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger):
try:
pa_table = pa.deserialize(ds_payload["data"])
reader = pa.BufferReader(ds_payload["data"])
pa_table = pa.ipc.open_stream(reader).read_all()
except pa.ArrowSerializationError as ex:
raise SerializationError("Unable to deserialize table") from ex

Expand Down

0 comments on commit 1583090

Please sign in to comment.