Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add excel export #22006

Merged
merged 8 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ wtforms==2.3.3
# wtforms-json
wtforms-json==0.3.3
# via apache-superset
xlsxwriter==3.0.7
# via apache-superset

# The following packages are considered to be unsafe in a requirements file:
# setuptools
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def get_git_sha() -> str:
"typing-extensions>=4, <5",
"wtforms>=2.3.3, <2.4",
"wtforms-json",
"xlsxwriter>=3.0.7, <3.1",
],
extras_require={
"athena": ["pyathena[pandas]>=2, <3"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ const MENU_KEYS = {
EXPORT_TO_CSV: 'export_to_csv',
EXPORT_TO_CSV_PIVOTED: 'export_to_csv_pivoted',
EXPORT_TO_JSON: 'export_to_json',
EXPORT_TO_XLSX: 'export_to_xlsx',
DOWNLOAD_AS_IMAGE: 'download_as_image',
SHARE_SUBMENU: 'share_submenu',
COPY_PERMALINK: 'copy_permalink',
Expand Down Expand Up @@ -165,6 +166,16 @@ export const useExploreAdditionalActionsMenu = (
[latestQueryFormData],
);

const exportExcel = useCallback(
() =>
exportChart({
formData: latestQueryFormData,
resultType: 'results',
resultFormat: 'xlsx',
}),
[latestQueryFormData],
);

const copyLink = useCallback(async () => {
try {
if (!latestQueryFormData) {
Expand Down Expand Up @@ -199,6 +210,11 @@ export const useExploreAdditionalActionsMenu = (
setIsDropdownVisible(false);
setOpenSubmenus([]);

break;
case MENU_KEYS.EXPORT_TO_XLSX:
exportExcel();
setIsDropdownVisible(false);
setOpenSubmenus([]);
break;
case MENU_KEYS.DOWNLOAD_AS_IMAGE:
downloadAsImage(
Expand Down Expand Up @@ -312,6 +328,12 @@ export const useExploreAdditionalActionsMenu = (
>
{t('Download as image')}
</Menu.Item>
<Menu.Item
key={MENU_KEYS.EXPORT_TO_XLSX}
icon={<Icons.FileOutlined css={iconReset} />}
>
{t('Export to Excel')}
</Menu.Item>
</Menu.SubMenu>
<Menu.SubMenu title={t('Share')} key={MENU_KEYS.SHARE_SUBMENU}>
<Menu.Item key={MENU_KEYS.COPY_PERMALINK}>
Expand Down
28 changes: 19 additions & 9 deletions superset/charts/data/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from superset.extensions import event_logger
from superset.utils.async_query_manager import AsyncQueryTokenException
from superset.utils.core import create_zip, get_user_id, json_int_dttm_ser
from superset.views.base import CsvResponse, generate_download_headers
from superset.views.base import CsvResponse, generate_download_headers, XlsxResponse
from superset.views.base_api import statsd_metrics

if TYPE_CHECKING:
Expand Down Expand Up @@ -353,24 +353,34 @@ def _send_chart_response(
if result_type == ChartDataResultType.POST_PROCESSED:
result = apply_post_process(result, form_data, datasource)

if result_format == ChartDataResultFormat.CSV:
# Verify user has permission to export CSV file
if result_format in ChartDataResultFormat.table_like():
# Verify user has permission to export file
if not security_manager.can_access("can_csv", "Superset"):
return self.response_403()

if not result["queries"]:
return self.response_400(_("Empty query result"))

is_csv_format = result_format == ChartDataResultFormat.CSV

if len(result["queries"]) == 1:
# return single query results csv format
# return single query results
data = result["queries"][0]["data"]
return CsvResponse(data, headers=generate_download_headers("csv"))
if is_csv_format:
return CsvResponse(data, headers=generate_download_headers("csv"))

return XlsxResponse(data, headers=generate_download_headers("xlsx"))

# return multi-query results bundled as a zip file
def _process_data(query_data: Any) -> Any:
if result_format == ChartDataResultFormat.CSV:
encoding = current_app.config["CSV_EXPORT"].get("encoding", "utf-8")
return query_data.encode(encoding)
return query_data

# return multi-query csv results bundled as a zip file
encoding = current_app.config["CSV_EXPORT"].get("encoding", "utf-8")
files = {
f"query_{idx + 1}.csv": result["data"].encode(encoding)
for idx, result in enumerate(result["queries"])
f"query_{idx + 1}.{result_format}": _process_data(query["data"])
for idx, query in enumerate(result["queries"])
}
return Response(
create_zip(files),
Expand Down
6 changes: 6 additions & 0 deletions superset/common/chart_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
from enum import Enum
from typing import Set


class ChartDataResultFormat(str, Enum):
Expand All @@ -24,6 +25,11 @@ class ChartDataResultFormat(str, Enum):

CSV = "csv"
JSON = "json"
XLSX = "xlsx"

@classmethod
def table_like(cls) -> Set["ChartDataResultFormat"]:
return {cls.CSV} | {cls.XLSX}


class ChartDataResultType(str, Enum):
Expand Down
15 changes: 10 additions & 5 deletions superset/common/query_context_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from superset.extensions import cache_manager, security_manager
from superset.models.helpers import QueryResult
from superset.models.sql_lab import Query
from superset.utils import csv
from superset.utils import csv, excel
from superset.utils.cache import generate_cache_key, set_and_log_cache
from superset.utils.core import (
DatasourceType,
Expand Down Expand Up @@ -446,15 +446,20 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme
return CachedTimeOffset(df=rv_df, queries=queries, cache_keys=cache_keys)

def get_data(self, df: pd.DataFrame) -> Union[str, List[Dict[str, Any]]]:
if self._query_context.result_format == ChartDataResultFormat.CSV:
if self._query_context.result_format in ChartDataResultFormat.table_like():
include_index = not isinstance(df.index, pd.RangeIndex)
columns = list(df.columns)
verbose_map = self._qc_datasource.data.get("verbose_map", {})
if verbose_map:
df.columns = [verbose_map.get(column, column) for column in columns]
result = csv.df_to_escaped_csv(
df, index=include_index, **config["CSV_EXPORT"]
)

result = None
if self._query_context.result_format == ChartDataResultFormat.CSV:
result = csv.df_to_escaped_csv(
df, index=include_index, **config["CSV_EXPORT"]
)
elif self._query_context.result_format == ChartDataResultFormat.XLSX:
result = excel.df_to_excel(df, **config["EXCEL_EXPORT"])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on df.to_excel(writer, **kwargs) is encoding one of the kwargs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment yes, we do the same like for CSV!

Copy link
Contributor

@EugeneTorap EugeneTorap Jan 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dpgaspar Can we merge the PR in order to continue #19810 PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems reasonable from where I sit... merging to unblock.

return result or ""

return df.to_dict(orient="records")
Expand Down
5 changes: 5 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,11 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]:
# note: index option should not be overridden
CSV_EXPORT = {"encoding": "utf-8"}

# Excel Options: key/value pairs that will be passed as argument to DataFrame.to_excel
# method.
# note: index option should not be overridden
EXCEL_EXPORT = {"encoding": "utf-8"}

# ---------------------------------------------------
# Time grain configurations
# ---------------------------------------------------
Expand Down
29 changes: 29 additions & 0 deletions superset/utils/excel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import io
from typing import Any

import pandas as pd


def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
output = io.BytesIO()
# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
df.to_excel(writer, **kwargs)

return output.getvalue()
11 changes: 11 additions & 0 deletions superset/views/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,17 @@ class CsvResponse(Response):
default_mimetype = "text/csv"


class XlsxResponse(Response):
"""
Override Response to use xlsx mimetype
"""

charset = "utf-8"
default_mimetype = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)


def bind_field(
_: Any, form: DynamicForm, unbound_field: UnboundField, options: Dict[Any, Any]
) -> Field:
Expand Down
48 changes: 48 additions & 0 deletions tests/integration_tests/charts/data/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,16 @@ def test_empty_request_with_csv_result_format(self):
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 400

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_empty_request_with_excel_result_format(self):
"""
Chart data API: Test empty chart data with Excel result format
"""
self.query_context_payload["result_format"] = "xlsx"
self.query_context_payload["queries"] = []
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 400

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_csv_result_format(self):
"""
Expand All @@ -265,6 +275,17 @@ def test_with_csv_result_format(self):
assert rv.status_code == 200
assert rv.mimetype == "text/csv"

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_excel_result_format(self):
"""
Chart data API: Test chart data with Excel result format
"""
self.query_context_payload["result_format"] = "xlsx"
mimetype = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 200
assert rv.mimetype == mimetype

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_multi_query_csv_result_format(self):
"""
Expand All @@ -280,6 +301,21 @@ def test_with_multi_query_csv_result_format(self):
zipfile = ZipFile(BytesIO(rv.data), "r")
assert zipfile.namelist() == ["query_1.csv", "query_2.csv"]

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_multi_query_excel_result_format(self):
"""
Chart data API: Test chart data with multi-query Excel result format
"""
self.query_context_payload["result_format"] = "xlsx"
self.query_context_payload["queries"].append(
self.query_context_payload["queries"][0]
)
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 200
assert rv.mimetype == "application/zip"
zipfile = ZipFile(BytesIO(rv.data), "r")
assert zipfile.namelist() == ["query_1.xlsx", "query_2.xlsx"]

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_csv_result_format_when_actor_not_permitted_for_csv__403(self):
"""
Expand All @@ -292,6 +328,18 @@ def test_with_csv_result_format_when_actor_not_permitted_for_csv__403(self):
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 403

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_excel_result_format_when_actor_not_permitted_for_excel__403(self):
"""
Chart data API: Test chart data with Excel result format
"""
self.logout()
self.login(username="gamma_no_csv")
self.query_context_payload["result_format"] = "xlsx"

rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 403

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_row_limit_and_offset__row_limit_and_offset_were_applied(self):
"""
Expand Down