From 7c3773f9ea41da24856e301a2f9315dca4f51dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20=C8=98incariov?= Date: Wed, 2 Nov 2022 15:15:53 +0300 Subject: [PATCH 1/6] feat: Adds xls and xlsx data export format --- setup.py | 2 + .../useExploreAdditionalActionsMenu/index.jsx | 42 +++++++++++++++++ superset/charts/data/api.py | 36 ++++++++++---- superset/common/chart_data.py | 20 ++++++++ superset/common/query_context_processor.py | 17 +++++-- superset/config.py | 5 ++ superset/utils/excel.py | 18 +++++++ superset/views/base.py | 20 ++++++++ superset/views/core.py | 14 ++++++ superset/viz.py | 10 +++- .../charts/data/api_tests.py | 47 +++++++++++++------ 11 files changed, 202 insertions(+), 29 deletions(-) create mode 100644 superset/utils/excel.py diff --git a/setup.py b/setup.py index cad76a9572997..abc6ceb1b1bde 100644 --- a/setup.py +++ b/setup.py @@ -173,6 +173,8 @@ def get_git_sha() -> str: "thumbnails": ["Pillow>=9.1.1, <10.0.0"], "vertica": ["sqlalchemy-vertica-python>=0.5.9, < 0.6"], "netezza": ["nzalchemy>=11.0.2"], + "xls": ["xlwt>=1.3.0, < 1.4"], + "xlsx": ["xlsxwriter>=3.0.0, < 3.1"], }, python_requires="~=3.8", author="Apache Software Foundation", diff --git a/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx b/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx index fa794e871ce57..f75f102459369 100644 --- a/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx +++ b/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx @@ -41,6 +41,8 @@ const MENU_KEYS = { EXPORT_TO_CSV: 'export_to_csv', EXPORT_TO_CSV_PIVOTED: 'export_to_csv_pivoted', EXPORT_TO_JSON: 'export_to_json', + EXPORT_TO_XLS: 'export_to_xls', + EXPORT_TO_XLSX: 'export_to_xlsx', DOWNLOAD_AS_IMAGE: 'download_as_image', SHARE_SUBMENU: 'share_submenu', COPY_PERMALINK: 'copy_permalink', @@ -157,6 +159,30 @@ export const useExploreAdditionalActionsMenu = ( }), [latestQueryFormData], ); + const exportXLS = useCallback( + () => + canDownloadCSV + ? exportChart({ + formData: latestQueryFormData, + ownState, + resultType: 'full', + resultFormat: 'xls', + }) + : null, + [canDownloadCSV, latestQueryFormData], + ); + const exportXLSX = useCallback( + () => + canDownloadCSV + ? exportChart({ + formData: latestQueryFormData, + ownState, + resultType: 'full', + resultFormat: 'xlsx', + }) + : null, + [canDownloadCSV, latestQueryFormData], + ); const copyLink = useCallback(async () => { try { @@ -192,6 +218,16 @@ export const useExploreAdditionalActionsMenu = ( setIsDropdownVisible(false); setOpenSubmenus([]); + break; + case MENU_KEYS.EXPORT_TO_XLS: + exportXLS(); + setIsDropdownVisible(false); + setOpenSubmenus([]); + break; + case MENU_KEYS.EXPORT_TO_XLSX: + exportXLSX(); + setIsDropdownVisible(false); + setOpenSubmenus([]); break; case MENU_KEYS.DOWNLOAD_AS_IMAGE: downloadAsImage( @@ -296,6 +332,12 @@ export const useExploreAdditionalActionsMenu = ( }> {t('Export to .JSON')} + }> + {t('Export to .XLS')} + + }> + {t('Export to .XLSX')} + } diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py index c20fdde6fd94e..0ec3b0b41856b 100644 --- a/superset/charts/data/api.py +++ b/superset/charts/data/api.py @@ -39,13 +39,22 @@ from superset.charts.data.query_context_cache_loader import QueryContextCacheLoader from superset.charts.post_processing import apply_post_process from superset.charts.schemas import ChartDataQueryContextSchema -from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType +from superset.common.chart_data import ( + CHART_DATA_RESULT_FORMAT_TO_RESPONSE, + ChartDataResultFormat, + ChartDataResultType, +) from superset.connectors.base.models import BaseDatasource from superset.exceptions import QueryObjectValidationError from superset.extensions import event_logger from superset.utils.async_query_manager import AsyncQueryTokenException from superset.utils.core import create_zip, get_user_id, json_int_dttm_ser -from superset.views.base import CsvResponse, generate_download_headers +from superset.views.base import ( + CsvResponse, + generate_download_headers, + XlsResponse, + XlsxResponse, +) from superset.views.base_api import statsd_metrics if TYPE_CHECKING: @@ -342,8 +351,8 @@ def _send_chart_response( if result_type == ChartDataResultType.POST_PROCESSED: result = apply_post_process(result, form_data, datasource) - if result_format == ChartDataResultFormat.CSV: - # Verify user has permission to export CSV file + if result_format in ChartDataResultFormat.table_like(): + # Verify user has permission to export file if not security_manager.can_access("can_csv", "Superset"): return self.response_403() @@ -351,14 +360,23 @@ def _send_chart_response( return self.response_400(_("Empty query result")) if len(result["queries"]) == 1: - # return single query results csv format + # return single query results data = result["queries"][0]["data"] - return CsvResponse(data, headers=generate_download_headers("csv")) + response_class = CHART_DATA_RESULT_FORMAT_TO_RESPONSE[result_format] + return response_class( + data, headers=generate_download_headers(result_format) + ) + + # return multi-query results bundled as a zip file + + def _process_data(d: Any) -> Any: + if result_format == ChartDataResultFormat.CSV: + encoding = current_app.config["CSV_EXPORT"].get("encoding", "utf-8") + return d.encode(encoding) + return d - # return multi-query csv results bundled as a zip file - encoding = current_app.config["CSV_EXPORT"].get("encoding", "utf-8") files = { - f"query_{idx + 1}.csv": result["data"].encode(encoding) + f"query_{idx + 1}.{result_format}": _process_data(result["data"]) for idx, result in enumerate(result["queries"]) } return Response( diff --git a/superset/common/chart_data.py b/superset/common/chart_data.py index ea31d4f13817f..680f0905cf285 100644 --- a/superset/common/chart_data.py +++ b/superset/common/chart_data.py @@ -15,6 +15,9 @@ # specific language governing permissions and limitations # under the License. from enum import Enum +from typing import Final, Set + +from superset.views.base import CsvResponse, XlsResponse, XlsxResponse class ChartDataResultFormat(str, Enum): @@ -24,6 +27,16 @@ class ChartDataResultFormat(str, Enum): CSV = "csv" JSON = "json" + XLS = "xls" + XLSX = "xlsx" + + @classmethod + def excel(cls) -> Set["ChartDataResultFormat"]: + return {cls.XLS, cls.XLSX} + + @classmethod + def table_like(cls) -> Set["ChartDataResultFormat"]: + return {cls.CSV} | {cls.XLS, cls.XLSX} class ChartDataResultType(str, Enum): @@ -39,3 +52,10 @@ class ChartDataResultType(str, Enum): TIMEGRAINS = "timegrains" POST_PROCESSED = "post_processed" DRILL_DETAIL = "drill_detail" + + +CHART_DATA_RESULT_FORMAT_TO_RESPONSE: Final = { + ChartDataResultFormat.CSV: CsvResponse, + ChartDataResultFormat.XLS: XlsResponse, + ChartDataResultFormat.XLSX: XlsxResponse, +} diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 01259ede1d8a5..3801d5410e22d 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -45,7 +45,7 @@ from superset.extensions import cache_manager, security_manager from superset.models.helpers import QueryResult from superset.models.sql_lab import Query -from superset.utils import csv +from superset.utils import csv, excel from superset.utils.cache import generate_cache_key, set_and_log_cache from superset.utils.core import ( DatasourceType, @@ -425,15 +425,22 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme return CachedTimeOffset(df=rv_df, queries=queries, cache_keys=cache_keys) def get_data(self, df: pd.DataFrame) -> Union[str, List[Dict[str, Any]]]: - if self._query_context.result_format == ChartDataResultFormat.CSV: + if self._query_context.result_format in ChartDataResultFormat.table_like(): include_index = not isinstance(df.index, pd.RangeIndex) columns = list(df.columns) verbose_map = self._qc_datasource.data.get("verbose_map", {}) if verbose_map: df.columns = [verbose_map.get(column, column) for column in columns] - result = csv.df_to_escaped_csv( - df, index=include_index, **config["CSV_EXPORT"] - ) + if self._query_context.result_type == ChartDataResultFormat.CSV: + result = csv.df_to_escaped_csv( + df, index=include_index, **config["CSV_EXPORT"] + ) + else: + result = excel.df_to_excel( + df, + excel_format=self._query_context.result_format, + **config["EXCEL_EXPORT"], + ) return result or "" return df.to_dict(orient="records") diff --git a/superset/config.py b/superset/config.py index 3412178223988..29143c933812a 100644 --- a/superset/config.py +++ b/superset/config.py @@ -662,6 +662,11 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]: # note: index option should not be overridden CSV_EXPORT = {"encoding": "utf-8"} +# Excel Options: key/value pairs that will be passed as argument to DataFrame.to_excel +# method. +# note: index option should not be overridden +EXCEL_EXPORT = {"encoding": "utf-8"} + # --------------------------------------------------- # Time grain configurations # --------------------------------------------------- diff --git a/superset/utils/excel.py b/superset/utils/excel.py new file mode 100644 index 0000000000000..595e3276f0c77 --- /dev/null +++ b/superset/utils/excel.py @@ -0,0 +1,18 @@ +import io +from typing import Any + +import pandas as pd + +from superset.common.chart_data import ChartDataResultFormat + + +def df_to_excel( + df: pd.DataFrame, + excel_format: ChartDataResultFormat = ChartDataResultFormat.XLSX, + **kwargs: Any +) -> bytes: + output = io.BytesIO() + engine = "xlwt" if excel_format == ChartDataResultFormat.XLS else "xlsxwriter" + with pd.ExcelWriter(output, engine=engine) as writer: + df.to_excel(writer, **kwargs) + return output.getvalue() diff --git a/superset/views/base.py b/superset/views/base.py index 0f9bfc9e4063b..a138d6436e480 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -664,6 +664,26 @@ class CsvResponse(Response): default_mimetype = "text/csv" +class XlsResponse(Response): + """ + Override Response to use xls mimetype + """ + + charset = "utf-8" + default_mimetype = "application/vnd.ms-excel" + + +class XlsxResponse(Response): + """ + Override Response to use xlsx mimetype + """ + + charset = "utf-8" + default_mimetype = ( + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ) + + def bind_field( _: Any, form: DynamicForm, unbound_field: UnboundField, options: Dict[Any, Any] ) -> Field: diff --git a/superset/views/core.py b/superset/views/core.py index 371a632f29535..a29ae08a77a57 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -152,6 +152,8 @@ json_errors_response, json_success, validate_sqlatable, + XlsResponse, + XlsxResponse, ) from superset.views.sql_lab.schemas import SqlJsonPayloadSchema from superset.views.utils import ( @@ -483,6 +485,18 @@ def generate_json( viz_obj.get_csv(), headers=generate_download_headers("csv") ) + if response_type == ChartDataResultFormat.XLS: + return XlsResponse( + viz_obj.get_excel(ChartDataResultFormat(response_type)), + headers=generate_download_headers("xls"), + ) + + if response_type == ChartDataResultFormat.XLSX: + return XlsxResponse( + viz_obj.get_excel(ChartDataResultFormat(response_type)), + headers=generate_download_headers("xlsx"), + ) + if response_type == ChartDataResultType.QUERY: return self.get_query_string_response(viz_obj) diff --git a/superset/viz.py b/superset/viz.py index 43e71b533c61c..0f7f5ea3e8fad 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -56,6 +56,7 @@ from pandas.tseries.frequencies import to_offset from superset import app +from superset.common.chart_data import ChartDataResultFormat from superset.common.db_query_status import QueryStatus from superset.constants import NULL_STRING from superset.errors import ErrorLevel, SupersetError, SupersetErrorType @@ -76,7 +77,7 @@ VizData, VizPayload, ) -from superset.utils import core as utils, csv +from superset.utils import core as utils, csv, excel from superset.utils.cache import set_and_log_cache from superset.utils.core import ( apply_max_row_limit, @@ -670,6 +671,13 @@ def get_csv(self) -> Optional[str]: include_index = not isinstance(df.index, pd.RangeIndex) return csv.df_to_escaped_csv(df, index=include_index, **config["CSV_EXPORT"]) + def get_excel(self, excel_format: ChartDataResultFormat) -> Optional[bytes]: + df = self.get_df_payload()["df"] + include_index = not isinstance(df.index, pd.RangeIndex) + return excel.df_to_excel( + df, index=include_index, excel_format=excel_format, **config["EXCEL_EXPORT"] + ) + def get_data(self, df: pd.DataFrame) -> VizData: # pylint: disable=no-self-use return df.to_dict(orient="records") diff --git a/tests/integration_tests/charts/data/api_tests.py b/tests/integration_tests/charts/data/api_tests.py index 8b2fd993886ce..e9b4f4d1c617d 100644 --- a/tests/integration_tests/charts/data/api_tests.py +++ b/tests/integration_tests/charts/data/api_tests.py @@ -246,31 +246,44 @@ def test_with_query_result_type__200(self): assert rv.status_code == 200 @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - def test_empty_request_with_csv_result_format(self): + @pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"]) + def test_empty_request_with_table_like_result_format(self, result_format): """ - Chart data API: Test empty chart data with CSV result format + Chart data API: Test empty chart data with table like result format """ - self.query_context_payload["result_format"] = "csv" + self.query_context_payload["result_format"] = result_format self.query_context_payload["queries"] = [] rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") assert rv.status_code == 400 @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - def test_with_csv_result_format(self): + @pytest.mark.parametrize( + "result_format,mimetype", + [ + ("csv", "text/csv"), + ("xls", "application/vnd.ms-excel"), + ( + "xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ) + ] + ) + def test_with_table_like_result_format(self, result_format, mimetype): """ - Chart data API: Test chart data with CSV result format + Chart data API: Test chart data with table like result format """ - self.query_context_payload["result_format"] = "csv" + self.query_context_payload["result_format"] = result_format rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") assert rv.status_code == 200 - assert rv.mimetype == "text/csv" + assert rv.mimetype == mimetype @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - def test_with_multi_query_csv_result_format(self): + @pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"]) + def test_with_multi_query_table_like_result_format(self, result_format): """ - Chart data API: Test chart data with multi-query CSV result format + Chart data API: Test chart data with multi-query table like result format """ - self.query_context_payload["result_format"] = "csv" + self.query_context_payload["result_format"] = result_format self.query_context_payload["queries"].append( self.query_context_payload["queries"][0] ) @@ -278,16 +291,22 @@ def test_with_multi_query_csv_result_format(self): assert rv.status_code == 200 assert rv.mimetype == "application/zip" zipfile = ZipFile(BytesIO(rv.data), "r") - assert zipfile.namelist() == ["query_1.csv", "query_2.csv"] + assert zipfile.namelist() == [ + f"query_1.{result_format}", + f"query_2.{result_format}", + ] @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - def test_with_csv_result_format_when_actor_not_permitted_for_csv__403(self): + @pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"]) + def test_with_table_like_result_format_when_actor_not_permitted_for__403( + self, result_format + ): """ - Chart data API: Test chart data with CSV result format + Chart data API: Test chart data with table like result format """ self.logout() self.login(username="gamma_no_csv") - self.query_context_payload["result_format"] = "csv" + self.query_context_payload["result_format"] = result_format rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") assert rv.status_code == 403 From df44434beb77de1fba4da58183441b19ecfa71cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20=C8=98incariov?= Date: Wed, 2 Nov 2022 15:44:01 +0300 Subject: [PATCH 2/6] feat: Fixes circular import bug --- superset/charts/data/api.py | 16 +++++++--------- superset/common/chart_data.py | 11 +---------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py index 0ec3b0b41856b..fc0f3f3a16643 100644 --- a/superset/charts/data/api.py +++ b/superset/charts/data/api.py @@ -39,11 +39,7 @@ from superset.charts.data.query_context_cache_loader import QueryContextCacheLoader from superset.charts.post_processing import apply_post_process from superset.charts.schemas import ChartDataQueryContextSchema -from superset.common.chart_data import ( - CHART_DATA_RESULT_FORMAT_TO_RESPONSE, - ChartDataResultFormat, - ChartDataResultType, -) +from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType from superset.connectors.base.models import BaseDatasource from superset.exceptions import QueryObjectValidationError from superset.extensions import event_logger @@ -362,10 +358,12 @@ def _send_chart_response( if len(result["queries"]) == 1: # return single query results data = result["queries"][0]["data"] - response_class = CHART_DATA_RESULT_FORMAT_TO_RESPONSE[result_format] - return response_class( - data, headers=generate_download_headers(result_format) - ) + if result_format == ChartDataResultFormat.CSV: + return CsvResponse(data, headers=generate_download_headers("csv")) + elif result_format == ChartDataResultFormat.XLS: + return XlsResponse(data, headers=generate_download_headers("xls")) + elif result_format == ChartDataResultFormat.XLSX: + return XlsxResponse(data, headers=generate_download_headers("xlsx")) # return multi-query results bundled as a zip file diff --git a/superset/common/chart_data.py b/superset/common/chart_data.py index 680f0905cf285..1cbe982b07c2a 100644 --- a/superset/common/chart_data.py +++ b/superset/common/chart_data.py @@ -15,9 +15,7 @@ # specific language governing permissions and limitations # under the License. from enum import Enum -from typing import Final, Set - -from superset.views.base import CsvResponse, XlsResponse, XlsxResponse +from typing import Set class ChartDataResultFormat(str, Enum): @@ -52,10 +50,3 @@ class ChartDataResultType(str, Enum): TIMEGRAINS = "timegrains" POST_PROCESSED = "post_processed" DRILL_DETAIL = "drill_detail" - - -CHART_DATA_RESULT_FORMAT_TO_RESPONSE: Final = { - ChartDataResultFormat.CSV: CsvResponse, - ChartDataResultFormat.XLS: XlsResponse, - ChartDataResultFormat.XLSX: XlsxResponse, -} From 9e3da1be5dd47a8241c86bacfdc70957edb20d39 Mon Sep 17 00:00:00 2001 From: EugeneTorap Date: Fri, 27 Jan 2023 12:41:56 +0300 Subject: [PATCH 3/6] Refactoring code --- requirements/base.txt | 2 + setup.py | 3 +- .../useExploreAdditionalActionsMenu/index.jsx | 47 +++------- superset/charts/data/api.py | 28 +++--- superset/common/chart_data.py | 7 +- superset/common/query_context_processor.py | 12 +-- superset/utils/excel.py | 29 ++++-- superset/views/base.py | 9 -- superset/views/core.py | 7 -- .../charts/data/api_tests.py | 93 ++++++++++++------- 10 files changed, 113 insertions(+), 124 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 30dc9c0a9187c..1e9183f479379 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -278,6 +278,8 @@ wtforms==2.3.3 # wtforms-json wtforms-json==0.3.3 # via apache-superset +xlsxwriter==3.0.3 + # via apache-superset # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/setup.py b/setup.py index 0e45eb5fdb14f..0fddbfb1b0ce2 100644 --- a/setup.py +++ b/setup.py @@ -121,6 +121,7 @@ def get_git_sha() -> str: "tabulate>=0.8.9, <0.9", "typing-extensions>=4, <5", "wtforms-json", + "xlsxwriter", ], extras_require={ "athena": ["pyathena[pandas]>=2, <3"], @@ -173,8 +174,6 @@ def get_git_sha() -> str: "thumbnails": ["Pillow>=9.1.1, <10.0.0"], "vertica": ["sqlalchemy-vertica-python>=0.5.9, < 0.6"], "netezza": ["nzalchemy>=11.0.2"], - "xls": ["xlwt>=1.3.0, < 1.4"], - "xlsx": ["xlsxwriter>=3.0.0, < 3.1"], }, python_requires="~=3.8", author="Apache Software Foundation", diff --git a/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx b/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx index f75f102459369..f448abdeda3b7 100644 --- a/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx +++ b/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx @@ -41,7 +41,6 @@ const MENU_KEYS = { EXPORT_TO_CSV: 'export_to_csv', EXPORT_TO_CSV_PIVOTED: 'export_to_csv_pivoted', EXPORT_TO_JSON: 'export_to_json', - EXPORT_TO_XLS: 'export_to_xls', EXPORT_TO_XLSX: 'export_to_xlsx', DOWNLOAD_AS_IMAGE: 'download_as_image', SHARE_SUBMENU: 'share_submenu', @@ -159,29 +158,15 @@ export const useExploreAdditionalActionsMenu = ( }), [latestQueryFormData], ); - const exportXLS = useCallback( - () => - canDownloadCSV - ? exportChart({ - formData: latestQueryFormData, - ownState, - resultType: 'full', - resultFormat: 'xls', - }) - : null, - [canDownloadCSV, latestQueryFormData], - ); - const exportXLSX = useCallback( + + const exportExcel = useCallback( () => - canDownloadCSV - ? exportChart({ - formData: latestQueryFormData, - ownState, - resultType: 'full', - resultFormat: 'xlsx', - }) - : null, - [canDownloadCSV, latestQueryFormData], + exportChart({ + formData: latestQueryFormData, + resultType: 'results', + resultFormat: 'xlsx', + }), + [latestQueryFormData], ); const copyLink = useCallback(async () => { @@ -218,14 +203,9 @@ export const useExploreAdditionalActionsMenu = ( setIsDropdownVisible(false); setOpenSubmenus([]); - break; - case MENU_KEYS.EXPORT_TO_XLS: - exportXLS(); - setIsDropdownVisible(false); - setOpenSubmenus([]); break; case MENU_KEYS.EXPORT_TO_XLSX: - exportXLSX(); + exportExcel(); setIsDropdownVisible(false); setOpenSubmenus([]); break; @@ -332,18 +312,15 @@ export const useExploreAdditionalActionsMenu = ( }> {t('Export to .JSON')} - }> - {t('Export to .XLS')} - - }> - {t('Export to .XLSX')} - } > {t('Download as image')} + }> + {t('Export to Excel')} + diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py index fc0f3f3a16643..0418472982fdf 100644 --- a/superset/charts/data/api.py +++ b/superset/charts/data/api.py @@ -45,12 +45,7 @@ from superset.extensions import event_logger from superset.utils.async_query_manager import AsyncQueryTokenException from superset.utils.core import create_zip, get_user_id, json_int_dttm_ser -from superset.views.base import ( - CsvResponse, - generate_download_headers, - XlsResponse, - XlsxResponse, -) +from superset.views.base import CsvResponse, generate_download_headers, XlsxResponse from superset.views.base_api import statsd_metrics if TYPE_CHECKING: @@ -355,27 +350,26 @@ def _send_chart_response( if not result["queries"]: return self.response_400(_("Empty query result")) + is_csv_format = result_format == ChartDataResultFormat.CSV + if len(result["queries"]) == 1: # return single query results data = result["queries"][0]["data"] - if result_format == ChartDataResultFormat.CSV: + if is_csv_format: return CsvResponse(data, headers=generate_download_headers("csv")) - elif result_format == ChartDataResultFormat.XLS: - return XlsResponse(data, headers=generate_download_headers("xls")) - elif result_format == ChartDataResultFormat.XLSX: - return XlsxResponse(data, headers=generate_download_headers("xlsx")) - # return multi-query results bundled as a zip file + return XlsxResponse(data, headers=generate_download_headers("xlsx")) - def _process_data(d: Any) -> Any: + # return multi-query results bundled as a zip file + def _process_data(query_data: Any) -> Any: if result_format == ChartDataResultFormat.CSV: encoding = current_app.config["CSV_EXPORT"].get("encoding", "utf-8") - return d.encode(encoding) - return d + return query_data.encode(encoding) + return query_data files = { - f"query_{idx + 1}.{result_format}": _process_data(result["data"]) - for idx, result in enumerate(result["queries"]) + f"query_{idx + 1}.{result_format}": _process_data(query["data"]) + for idx, query in enumerate(result["queries"]) } return Response( create_zip(files), diff --git a/superset/common/chart_data.py b/superset/common/chart_data.py index 1cbe982b07c2a..659a640159378 100644 --- a/superset/common/chart_data.py +++ b/superset/common/chart_data.py @@ -25,16 +25,11 @@ class ChartDataResultFormat(str, Enum): CSV = "csv" JSON = "json" - XLS = "xls" XLSX = "xlsx" - @classmethod - def excel(cls) -> Set["ChartDataResultFormat"]: - return {cls.XLS, cls.XLSX} - @classmethod def table_like(cls) -> Set["ChartDataResultFormat"]: - return {cls.CSV} | {cls.XLS, cls.XLSX} + return {cls.CSV} | {cls.XLSX} class ChartDataResultType(str, Enum): diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index f0fba43e02dc8..747b1aebcd3f1 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -452,16 +452,14 @@ def get_data(self, df: pd.DataFrame) -> Union[str, List[Dict[str, Any]]]: verbose_map = self._qc_datasource.data.get("verbose_map", {}) if verbose_map: df.columns = [verbose_map.get(column, column) for column in columns] - if self._query_context.result_type == ChartDataResultFormat.CSV: + + result = None + if self._query_context.result_format == ChartDataResultFormat.CSV: result = csv.df_to_escaped_csv( df, index=include_index, **config["CSV_EXPORT"] ) - else: - result = excel.df_to_excel( - df, - excel_format=self._query_context.result_format, - **config["EXCEL_EXPORT"], - ) + elif self._query_context.result_format == ChartDataResultFormat.XLSX: + result = excel.df_to_excel(df, **config["EXCEL_EXPORT"]) return result or "" return df.to_dict(orient="records") diff --git a/superset/utils/excel.py b/superset/utils/excel.py index 595e3276f0c77..1f68031b6497b 100644 --- a/superset/utils/excel.py +++ b/superset/utils/excel.py @@ -1,18 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import io from typing import Any import pandas as pd -from superset.common.chart_data import ChartDataResultFormat - -def df_to_excel( - df: pd.DataFrame, - excel_format: ChartDataResultFormat = ChartDataResultFormat.XLSX, - **kwargs: Any -) -> bytes: +def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any: output = io.BytesIO() - engine = "xlwt" if excel_format == ChartDataResultFormat.XLS else "xlsxwriter" - with pd.ExcelWriter(output, engine=engine) as writer: + # pylint: disable=abstract-class-instantiated + with pd.ExcelWriter(output, engine="xlsxwriter") as writer: df.to_excel(writer, **kwargs) + return output.getvalue() diff --git a/superset/views/base.py b/superset/views/base.py index eb01c50c0283a..8eb7e58693938 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -666,15 +666,6 @@ class CsvResponse(Response): default_mimetype = "text/csv" -class XlsResponse(Response): - """ - Override Response to use xls mimetype - """ - - charset = "utf-8" - default_mimetype = "application/vnd.ms-excel" - - class XlsxResponse(Response): """ Override Response to use xlsx mimetype diff --git a/superset/views/core.py b/superset/views/core.py index cfa5ab2218503..dfe57c7e50f24 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -152,7 +152,6 @@ json_errors_response, json_success, validate_sqlatable, - XlsResponse, XlsxResponse, ) from superset.views.sql_lab.schemas import SqlJsonPayloadSchema @@ -497,12 +496,6 @@ def generate_json( viz_obj.get_csv(), headers=generate_download_headers("csv") ) - if response_type == ChartDataResultFormat.XLS: - return XlsResponse( - viz_obj.get_excel(ChartDataResultFormat(response_type)), - headers=generate_download_headers("xls"), - ) - if response_type == ChartDataResultFormat.XLSX: return XlsxResponse( viz_obj.get_excel(ChartDataResultFormat(response_type)), diff --git a/tests/integration_tests/charts/data/api_tests.py b/tests/integration_tests/charts/data/api_tests.py index e9b4f4d1c617d..48ce6cc91d2ab 100644 --- a/tests/integration_tests/charts/data/api_tests.py +++ b/tests/integration_tests/charts/data/api_tests.py @@ -246,44 +246,52 @@ def test_with_query_result_type__200(self): assert rv.status_code == 200 @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - @pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"]) - def test_empty_request_with_table_like_result_format(self, result_format): + def test_empty_request_with_csv_result_format(self): """ - Chart data API: Test empty chart data with table like result format + Chart data API: Test empty chart data with CSV result format """ - self.query_context_payload["result_format"] = result_format + self.query_context_payload["result_format"] = "csv" self.query_context_payload["queries"] = [] rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") assert rv.status_code == 400 @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - @pytest.mark.parametrize( - "result_format,mimetype", - [ - ("csv", "text/csv"), - ("xls", "application/vnd.ms-excel"), - ( - "xlsx", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - ) - ] - ) - def test_with_table_like_result_format(self, result_format, mimetype): + def test_empty_request_with_excel_result_format(self): + """ + Chart data API: Test empty chart data with Excel result format + """ + self.query_context_payload["result_format"] = "xlsx" + self.query_context_payload["queries"] = [] + rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") + assert rv.status_code == 400 + + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + def test_with_csv_result_format(self): """ - Chart data API: Test chart data with table like result format + Chart data API: Test chart data with CSV result format """ - self.query_context_payload["result_format"] = result_format + self.query_context_payload["result_format"] = "csv" + rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") + assert rv.status_code == 200 + assert rv.mimetype == "text/csv" + + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + def test_with_excel_result_format(self): + """ + Chart data API: Test chart data with Excel result format + """ + self.query_context_payload["result_format"] = "xlsx" + mimetype = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") assert rv.status_code == 200 assert rv.mimetype == mimetype @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - @pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"]) - def test_with_multi_query_table_like_result_format(self, result_format): + def test_with_multi_query_csv_result_format(self): """ - Chart data API: Test chart data with multi-query table like result format + Chart data API: Test chart data with multi-query CSV result format """ - self.query_context_payload["result_format"] = result_format + self.query_context_payload["result_format"] = "csv" self.query_context_payload["queries"].append( self.query_context_payload["queries"][0] ) @@ -291,22 +299,43 @@ def test_with_multi_query_table_like_result_format(self, result_format): assert rv.status_code == 200 assert rv.mimetype == "application/zip" zipfile = ZipFile(BytesIO(rv.data), "r") - assert zipfile.namelist() == [ - f"query_1.{result_format}", - f"query_2.{result_format}", - ] + assert zipfile.namelist() == ["query_1.csv", "query_2.csv"] @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - @pytest.mark.parametrize("result_format", ["csv", "xls", "xlsx"]) - def test_with_table_like_result_format_when_actor_not_permitted_for__403( - self, result_format - ): + def test_with_multi_query_excel_result_format(self): + """ + Chart data API: Test chart data with multi-query Excel result format + """ + self.query_context_payload["result_format"] = "xlsx" + self.query_context_payload["queries"].append( + self.query_context_payload["queries"][0] + ) + rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") + assert rv.status_code == 200 + assert rv.mimetype == "application/zip" + zipfile = ZipFile(BytesIO(rv.data), "r") + assert zipfile.namelist() == ["query_1.xlsx", "query_2.xlsx"] + + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + def test_with_csv_result_format_when_actor_not_permitted_for_csv__403(self): + """ + Chart data API: Test chart data with CSV result format + """ + self.logout() + self.login(username="gamma_no_csv") + self.query_context_payload["result_format"] = "csv" + + rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") + assert rv.status_code == 403 + + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + def test_with_excel_result_format_when_actor_not_permitted_for_excel__403(self): """ - Chart data API: Test chart data with table like result format + Chart data API: Test chart data with Excel result format """ self.logout() self.login(username="gamma_no_csv") - self.query_context_payload["result_format"] = result_format + self.query_context_payload["result_format"] = "xlsx" rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") assert rv.status_code == 403 From a83fff17c415c5ca878f0b588b1128ca488125fd Mon Sep 17 00:00:00 2001 From: EugeneTorap Date: Fri, 27 Jan 2023 12:56:35 +0300 Subject: [PATCH 4/6] Fix UI bug --- .../components/useExploreAdditionalActionsMenu/index.jsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx b/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx index a930575e43029..445db6dc44145 100644 --- a/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx +++ b/superset-frontend/src/explore/components/useExploreAdditionalActionsMenu/index.jsx @@ -328,7 +328,10 @@ export const useExploreAdditionalActionsMenu = ( > {t('Download as image')} - }> + } + > {t('Export to Excel')} From 8a18b9846ac0efbbb4f4fa1e8ddd27fac46e38c9 Mon Sep 17 00:00:00 2001 From: EugeneTorap Date: Fri, 27 Jan 2023 13:16:06 +0300 Subject: [PATCH 5/6] Upgrade xlsxwriter to 3.0.7 --- requirements/base.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index fcc5191f34ce3..400dca59d1477 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -292,7 +292,7 @@ wtforms==2.3.3 # wtforms-json wtforms-json==0.3.3 # via apache-superset -xlsxwriter==3.0.3 +xlsxwriter==3.0.7 # via apache-superset # The following packages are considered to be unsafe in a requirements file: diff --git a/setup.py b/setup.py index 74e55b67ee72d..448566d0bc635 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ def get_git_sha() -> str: "typing-extensions>=4, <5", "wtforms>=2.3.3, <2.4", "wtforms-json", - "xlsxwriter", + "xlsxwriter>=3.0.7, <3.1", ], extras_require={ "athena": ["pyathena[pandas]>=2, <3"], From 4c98433f54187377baae3f7ba20e622cff449e6a Mon Sep 17 00:00:00 2001 From: EugeneTorap Date: Fri, 27 Jan 2023 13:21:07 +0300 Subject: [PATCH 6/6] Remove excel logic from legacy API --- superset/views/core.py | 7 ------- superset/viz.py | 10 +--------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/superset/views/core.py b/superset/views/core.py index 984fe3815dcb5..d65023d600ba0 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -152,7 +152,6 @@ json_errors_response, json_success, validate_sqlatable, - XlsxResponse, ) from superset.views.log.dao import LogDAO from superset.views.sql_lab.schemas import SqlJsonPayloadSchema @@ -488,12 +487,6 @@ def generate_json( viz_obj.get_csv(), headers=generate_download_headers("csv") ) - if response_type == ChartDataResultFormat.XLSX: - return XlsxResponse( - viz_obj.get_excel(ChartDataResultFormat(response_type)), - headers=generate_download_headers("xlsx"), - ) - if response_type == ChartDataResultType.QUERY: return self.get_query_string_response(viz_obj) diff --git a/superset/viz.py b/superset/viz.py index bd2fbffecaa28..c4582925da120 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -56,7 +56,6 @@ from pandas.tseries.frequencies import to_offset from superset import app -from superset.common.chart_data import ChartDataResultFormat from superset.common.db_query_status import QueryStatus from superset.constants import NULL_STRING from superset.errors import ErrorLevel, SupersetError, SupersetErrorType @@ -77,7 +76,7 @@ VizData, VizPayload, ) -from superset.utils import core as utils, csv, excel +from superset.utils import core as utils, csv from superset.utils.cache import set_and_log_cache from superset.utils.core import ( apply_max_row_limit, @@ -671,13 +670,6 @@ def get_csv(self) -> Optional[str]: include_index = not isinstance(df.index, pd.RangeIndex) return csv.df_to_escaped_csv(df, index=include_index, **config["CSV_EXPORT"]) - def get_excel(self, excel_format: ChartDataResultFormat) -> Optional[bytes]: - df = self.get_df_payload()["df"] - include_index = not isinstance(df.index, pd.RangeIndex) - return excel.df_to_excel( - df, index=include_index, excel_format=excel_format, **config["EXCEL_EXPORT"] - ) - def get_data(self, df: pd.DataFrame) -> VizData: # pylint: disable=no-self-use return df.to_dict(orient="records")