Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add excel export #22006

Merged
merged 8 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ def get_git_sha() -> str:
"thumbnails": ["Pillow>=9.1.1, <10.0.0"],
"vertica": ["sqlalchemy-vertica-python>=0.5.9, < 0.6"],
"netezza": ["nzalchemy>=11.0.2"],
"xls": ["xlwt>=1.3.0, < 1.4"],
"xlsx": ["xlsxwriter>=3.0.0, < 3.1"],
},
python_requires="~=3.8",
author="Apache Software Foundation",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ const MENU_KEYS = {
EXPORT_TO_CSV: 'export_to_csv',
EXPORT_TO_CSV_PIVOTED: 'export_to_csv_pivoted',
EXPORT_TO_JSON: 'export_to_json',
EXPORT_TO_XLS: 'export_to_xls',
EXPORT_TO_XLSX: 'export_to_xlsx',
DOWNLOAD_AS_IMAGE: 'download_as_image',
SHARE_SUBMENU: 'share_submenu',
COPY_PERMALINK: 'copy_permalink',
Expand Down Expand Up @@ -157,6 +159,30 @@ export const useExploreAdditionalActionsMenu = (
}),
[latestQueryFormData],
);
const exportXLS = useCallback(
() =>
canDownloadCSV
? exportChart({
formData: latestQueryFormData,
ownState,
resultType: 'full',
resultFormat: 'xls',
})
: null,
[canDownloadCSV, latestQueryFormData],
);
const exportXLSX = useCallback(
() =>
canDownloadCSV
? exportChart({
formData: latestQueryFormData,
ownState,
resultType: 'full',
resultFormat: 'xlsx',
})
: null,
[canDownloadCSV, latestQueryFormData],
);

const copyLink = useCallback(async () => {
try {
Expand Down Expand Up @@ -192,6 +218,16 @@ export const useExploreAdditionalActionsMenu = (
setIsDropdownVisible(false);
setOpenSubmenus([]);

break;
case MENU_KEYS.EXPORT_TO_XLS:
exportXLS();
setIsDropdownVisible(false);
setOpenSubmenus([]);
break;
case MENU_KEYS.EXPORT_TO_XLSX:
exportXLSX();
setIsDropdownVisible(false);
setOpenSubmenus([]);
break;
case MENU_KEYS.DOWNLOAD_AS_IMAGE:
downloadAsImage(
Expand Down Expand Up @@ -296,6 +332,12 @@ export const useExploreAdditionalActionsMenu = (
<Menu.Item key={MENU_KEYS.EXPORT_TO_JSON} icon={<FileOutlined />}>
{t('Export to .JSON')}
</Menu.Item>
<Menu.Item key={MENU_KEYS.EXPORT_TO_XLS} icon={<FileOutlined />}>
{t('Export to .XLS')}
</Menu.Item>
<Menu.Item key={MENU_KEYS.EXPORT_TO_XLSX} icon={<FileOutlined />}>
{t('Export to .XLSX')}
</Menu.Item>
<Menu.Item
key={MENU_KEYS.DOWNLOAD_AS_IMAGE}
icon={<FileImageOutlined />}
Expand Down
32 changes: 24 additions & 8 deletions superset/charts/data/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@
from superset.extensions import event_logger
from superset.utils.async_query_manager import AsyncQueryTokenException
from superset.utils.core import create_zip, get_user_id, json_int_dttm_ser
from superset.views.base import CsvResponse, generate_download_headers
from superset.views.base import (
CsvResponse,
generate_download_headers,
XlsResponse,
XlsxResponse,
)
from superset.views.base_api import statsd_metrics

if TYPE_CHECKING:
Expand Down Expand Up @@ -342,23 +347,34 @@ def _send_chart_response(
if result_type == ChartDataResultType.POST_PROCESSED:
result = apply_post_process(result, form_data, datasource)

if result_format == ChartDataResultFormat.CSV:
# Verify user has permission to export CSV file
if result_format in ChartDataResultFormat.table_like():
# Verify user has permission to export file
if not security_manager.can_access("can_csv", "Superset"):
return self.response_403()

if not result["queries"]:
return self.response_400(_("Empty query result"))

if len(result["queries"]) == 1:
# return single query results csv format
# return single query results
data = result["queries"][0]["data"]
return CsvResponse(data, headers=generate_download_headers("csv"))
if result_format == ChartDataResultFormat.CSV:
return CsvResponse(data, headers=generate_download_headers("csv"))
elif result_format == ChartDataResultFormat.XLS:
return XlsResponse(data, headers=generate_download_headers("xls"))
elif result_format == ChartDataResultFormat.XLSX:
return XlsxResponse(data, headers=generate_download_headers("xlsx"))

# return multi-query results bundled as a zip file

def _process_data(d: Any) -> Any:
if result_format == ChartDataResultFormat.CSV:
encoding = current_app.config["CSV_EXPORT"].get("encoding", "utf-8")
return d.encode(encoding)
return d

# return multi-query csv results bundled as a zip file
encoding = current_app.config["CSV_EXPORT"].get("encoding", "utf-8")
files = {
f"query_{idx + 1}.csv": result["data"].encode(encoding)
f"query_{idx + 1}.{result_format}": _process_data(result["data"])
for idx, result in enumerate(result["queries"])
}
return Response(
Expand Down
11 changes: 11 additions & 0 deletions superset/common/chart_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
from enum import Enum
from typing import Set


class ChartDataResultFormat(str, Enum):
Expand All @@ -24,6 +25,16 @@ class ChartDataResultFormat(str, Enum):

CSV = "csv"
JSON = "json"
XLS = "xls"
XLSX = "xlsx"

@classmethod
def excel(cls) -> Set["ChartDataResultFormat"]:
return {cls.XLS, cls.XLSX}

@classmethod
def table_like(cls) -> Set["ChartDataResultFormat"]:
return {cls.CSV} | {cls.XLS, cls.XLSX}


class ChartDataResultType(str, Enum):
Expand Down
17 changes: 12 additions & 5 deletions superset/common/query_context_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from superset.extensions import cache_manager, security_manager
from superset.models.helpers import QueryResult
from superset.models.sql_lab import Query
from superset.utils import csv
from superset.utils import csv, excel
from superset.utils.cache import generate_cache_key, set_and_log_cache
from superset.utils.core import (
DatasourceType,
Expand Down Expand Up @@ -425,15 +425,22 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme
return CachedTimeOffset(df=rv_df, queries=queries, cache_keys=cache_keys)

def get_data(self, df: pd.DataFrame) -> Union[str, List[Dict[str, Any]]]:
if self._query_context.result_format == ChartDataResultFormat.CSV:
if self._query_context.result_format in ChartDataResultFormat.table_like():
include_index = not isinstance(df.index, pd.RangeIndex)
columns = list(df.columns)
verbose_map = self._qc_datasource.data.get("verbose_map", {})
if verbose_map:
df.columns = [verbose_map.get(column, column) for column in columns]
result = csv.df_to_escaped_csv(
df, index=include_index, **config["CSV_EXPORT"]
)
if self._query_context.result_type == ChartDataResultFormat.CSV:
result = csv.df_to_escaped_csv(
df, index=include_index, **config["CSV_EXPORT"]
)
else:
result = excel.df_to_excel(
df,
excel_format=self._query_context.result_format,
**config["EXCEL_EXPORT"],
)
return result or ""

return df.to_dict(orient="records")
Expand Down
5 changes: 5 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,11 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]:
# note: index option should not be overridden
CSV_EXPORT = {"encoding": "utf-8"}

# Excel Options: key/value pairs that will be passed as argument to DataFrame.to_excel
# method.
# note: index option should not be overridden
EXCEL_EXPORT = {"encoding": "utf-8"}

# ---------------------------------------------------
# Time grain configurations
# ---------------------------------------------------
Expand Down
18 changes: 18 additions & 0 deletions superset/utils/excel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import io
from typing import Any

import pandas as pd

from superset.common.chart_data import ChartDataResultFormat


def df_to_excel(
df: pd.DataFrame,
excel_format: ChartDataResultFormat = ChartDataResultFormat.XLSX,
**kwargs: Any
) -> bytes:
output = io.BytesIO()
engine = "xlwt" if excel_format == ChartDataResultFormat.XLS else "xlsxwriter"
with pd.ExcelWriter(output, engine=engine) as writer:
df.to_excel(writer, **kwargs)
return output.getvalue()
20 changes: 20 additions & 0 deletions superset/views/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,26 @@ class CsvResponse(Response):
default_mimetype = "text/csv"


class XlsResponse(Response):
"""
Override Response to use xls mimetype
"""

charset = "utf-8"
default_mimetype = "application/vnd.ms-excel"


class XlsxResponse(Response):
"""
Override Response to use xlsx mimetype
"""

charset = "utf-8"
default_mimetype = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)


def bind_field(
_: Any, form: DynamicForm, unbound_field: UnboundField, options: Dict[Any, Any]
) -> Field:
Expand Down
14 changes: 14 additions & 0 deletions superset/views/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@
json_errors_response,
json_success,
validate_sqlatable,
XlsResponse,
XlsxResponse,
)
from superset.views.sql_lab.schemas import SqlJsonPayloadSchema
from superset.views.utils import (
Expand Down Expand Up @@ -483,6 +485,18 @@ def generate_json(
viz_obj.get_csv(), headers=generate_download_headers("csv")
)

if response_type == ChartDataResultFormat.XLS:
return XlsResponse(
viz_obj.get_excel(ChartDataResultFormat(response_type)),
headers=generate_download_headers("xls"),
)

if response_type == ChartDataResultFormat.XLSX:
return XlsxResponse(
viz_obj.get_excel(ChartDataResultFormat(response_type)),
headers=generate_download_headers("xlsx"),
)

if response_type == ChartDataResultType.QUERY:
return self.get_query_string_response(viz_obj)

Expand Down
10 changes: 9 additions & 1 deletion superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from pandas.tseries.frequencies import to_offset

from superset import app
from superset.common.chart_data import ChartDataResultFormat
from superset.common.db_query_status import QueryStatus
from superset.constants import NULL_STRING
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
Expand All @@ -76,7 +77,7 @@
VizData,
VizPayload,
)
from superset.utils import core as utils, csv
from superset.utils import core as utils, csv, excel
from superset.utils.cache import set_and_log_cache
from superset.utils.core import (
apply_max_row_limit,
Expand Down Expand Up @@ -670,6 +671,13 @@ def get_csv(self) -> Optional[str]:
include_index = not isinstance(df.index, pd.RangeIndex)
return csv.df_to_escaped_csv(df, index=include_index, **config["CSV_EXPORT"])

def get_excel(self, excel_format: ChartDataResultFormat) -> Optional[bytes]:
df = self.get_df_payload()["df"]
include_index = not isinstance(df.index, pd.RangeIndex)
return excel.df_to_excel(
df, index=include_index, excel_format=excel_format, **config["EXCEL_EXPORT"]
)

def get_data(self, df: pd.DataFrame) -> VizData: # pylint: disable=no-self-use
return df.to_dict(orient="records")

Expand Down
47 changes: 33 additions & 14 deletions tests/integration_tests/charts/data/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,48 +246,67 @@ def test_with_query_result_type__200(self):
assert rv.status_code == 200

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_empty_request_with_csv_result_format(self):
@pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"])
def test_empty_request_with_table_like_result_format(self, result_format):
"""
Chart data API: Test empty chart data with CSV result format
Chart data API: Test empty chart data with table like result format
"""
self.query_context_payload["result_format"] = "csv"
self.query_context_payload["result_format"] = result_format
self.query_context_payload["queries"] = []
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 400

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_csv_result_format(self):
@pytest.mark.parametrize(
"result_format,mimetype",
[
("csv", "text/csv"),
("xls", "application/vnd.ms-excel"),
(
"xlsx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
]
)
def test_with_table_like_result_format(self, result_format, mimetype):
"""
Chart data API: Test chart data with CSV result format
Chart data API: Test chart data with table like result format
"""
self.query_context_payload["result_format"] = "csv"
self.query_context_payload["result_format"] = result_format
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 200
assert rv.mimetype == "text/csv"
assert rv.mimetype == mimetype

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_multi_query_csv_result_format(self):
@pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"])
def test_with_multi_query_table_like_result_format(self, result_format):
"""
Chart data API: Test chart data with multi-query CSV result format
Chart data API: Test chart data with multi-query table like result format
"""
self.query_context_payload["result_format"] = "csv"
self.query_context_payload["result_format"] = result_format
self.query_context_payload["queries"].append(
self.query_context_payload["queries"][0]
)
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 200
assert rv.mimetype == "application/zip"
zipfile = ZipFile(BytesIO(rv.data), "r")
assert zipfile.namelist() == ["query_1.csv", "query_2.csv"]
assert zipfile.namelist() == [
f"query_1.{result_format}",
f"query_2.{result_format}",
]

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_csv_result_format_when_actor_not_permitted_for_csv__403(self):
@pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"])
def test_with_table_like_result_format_when_actor_not_permitted_for__403(
self, result_format
):
"""
Chart data API: Test chart data with CSV result format
Chart data API: Test chart data with table like result format
"""
self.logout()
self.login(username="gamma_no_csv")
self.query_context_payload["result_format"] = "csv"
self.query_context_payload["result_format"] = result_format

rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 403
Expand Down